summaryrefslogtreecommitdiffstats
path: root/windows
diff options
context:
space:
mode:
authorScott Wegner <swegner@hdfgroup.org>2008-04-14 14:59:52 (GMT)
committerScott Wegner <swegner@hdfgroup.org>2008-04-14 14:59:52 (GMT)
commite622c634d5fe078c69e02c91d1054529eaa55b05 (patch)
tree0a1c44d5362fb3b4f35af022f5f811095522ac0e /windows
parent740769d37f3ff0689140ce77e26530a64674d549 (diff)
downloadhdf5-e622c634d5fe078c69e02c91d1054529eaa55b05.zip
hdf5-e622c634d5fe078c69e02c91d1054529eaa55b05.tar.gz
hdf5-e622c634d5fe078c69e02c91d1054529eaa55b05.tar.bz2
[svn-r14835] Purpose: Remove English-specific code from Windows test scripts
Description: In previous versions of Windows, the builtin 'FC' command (diff equivalent) didn't return proper exit status. As a work-around, we parsed the message returned to check status. This relies on English return messages. In current Windows XP and Windows Vista, FC will return exit status as expected, so we can remove this workaround. Older platforms where we would need this workaround are no longer supported. Tested: VS2005 on Windows XP Small test on Windows Vista
Diffstat (limited to 'windows')
-rwxr-xr-xwindows/hdf5bt.BAT1
-rw-r--r--windows/test/testerror.bat6
-rw-r--r--windows/tools/h5copy/testh5copy.bat4
-rw-r--r--windows/tools/h5diff/testh5diff.bat2
-rw-r--r--windows/tools/h5dump/testh5dump.bat4
-rw-r--r--windows/tools/h5dump/testh5dumpxml.bat4
-rw-r--r--windows/tools/h5import/h5importtestutil.bat4
-rw-r--r--windows/tools/h5jam/testh5jam.bat4
-rw-r--r--windows/tools/h5ls/testh5ls.bat2
-rw-r--r--windows/tools/h5mkgrp/testh5mkgrp.bat4
-rw-r--r--windows/tools/h5stat/testh5stat.bat4
11 files changed, 20 insertions, 19 deletions
diff --git a/windows/hdf5bt.BAT b/windows/hdf5bt.BAT
index 08f48c4..c03027a 100755
--- a/windows/hdf5bt.BAT
+++ b/windows/hdf5bt.BAT
@@ -45,6 +45,7 @@ setlocal enabledelayedexpansion
pushd %~dp0
set nerrors=0
+if "%1"=="/?" goto help
goto main
rem Print a help message
diff --git a/windows/test/testerror.bat b/windows/test/testerror.bat
index 3dad50b..7204d99 100644
--- a/windows/test/testerror.bat
+++ b/windows/test/testerror.bat
@@ -141,11 +141,11 @@ rem
)
)
- fc /w %expect1_parsed% %actual% | find "FC: no diff" > nul
+ fc /w %expect1_parsed% %actual% > nul
if %errorlevel% equ 0 (
call :testing PASSED %test_err%
) else (
- fc /w %expect2_parsed% %actual% | find "FC: no diff" > nul
+ fc /w %expect2_parsed% %actual% > nul
if !errorlevel! equ 0 (
call :testing PASSED %test_err%
) else (
@@ -195,4 +195,4 @@ rem ############################################################################
popd
endlocal & exit /b %nerrors%
- \ No newline at end of file
+
diff --git a/windows/tools/h5copy/testh5copy.bat b/windows/tools/h5copy/testh5copy.bat
index 8fc9f67..7b3d034 100644
--- a/windows/tools/h5copy/testh5copy.bat
+++ b/windows/tools/h5copy/testh5copy.bat
@@ -211,7 +211,7 @@ rem rem Create the expect file if it doesn't yet exist
rem call :verify_h5ls CREATED %*
rem copy %actual% %expect%
rem ) else (
- fc %expect_parsed% %actual_parsed% | find "FC: no diff" > nul
+ fc %expect_parsed% %actual_parsed% > nul
if %errorlevel% equ 0 (
call :verify_h5ls PASSED %*
) else (
@@ -305,4 +305,4 @@ rem ############################################################################
popd
endlocal & exit /b %nerrors%
- \ No newline at end of file
+
diff --git a/windows/tools/h5diff/testh5diff.bat b/windows/tools/h5diff/testh5diff.bat
index fba7ef0..d3c407a 100644
--- a/windows/tools/h5diff/testh5diff.bat
+++ b/windows/tools/h5diff/testh5diff.bat
@@ -141,7 +141,7 @@ rem
call :testing CREATED %h5diff% %params%
copy /y %actual% %expect% > nul
) else (
- fc /w %expect% %actual% | find "FC: no diff" > nul
+ fc /w %expect% %actual% > nul
if !errorlevel! equ 0 (
call :testing PASSED %h5diff% %params%
) else (
diff --git a/windows/tools/h5dump/testh5dump.bat b/windows/tools/h5dump/testh5dump.bat
index f26a5d9..b8e8210 100644
--- a/windows/tools/h5dump/testh5dump.bat
+++ b/windows/tools/h5dump/testh5dump.bat
@@ -124,7 +124,7 @@ rem
call :testing CREATED %params%
copy /y %actual% %expect% > nul
) else (
- fc /w %expect% %actual% | find "FC: no diff" > nul
+ fc /w %expect% %actual% > nul
if !errorlevel! equ 0 (
call :testing PASSED %params%
) else (
@@ -176,7 +176,7 @@ rem use for the binary tests that expect a full path in -o
call :testing CREATED %params%
copy /y %actual% %expect% > nul
) else (
- fc /w %expect% %actual% | find "FC: no diff" > nul
+ fc /w %expect% %actual% > nul
if !errorlevel! equ 0 (
call :testing PASSED %params%
) else (
diff --git a/windows/tools/h5dump/testh5dumpxml.bat b/windows/tools/h5dump/testh5dumpxml.bat
index 7fd04f3..1b7893e 100644
--- a/windows/tools/h5dump/testh5dumpxml.bat
+++ b/windows/tools/h5dump/testh5dumpxml.bat
@@ -89,7 +89,7 @@ rem
call :testing CREATED %params%
copy %actual% %expect% > nul
) else (
- fc /w %expect% %actual% | find "FC: no diff" > nul
+ fc /w %expect% %actual% > nul
if !errorlevel! equ 0 (
call :testing PASSED %params%
) else (
@@ -212,4 +212,4 @@ rem ############################################################################
popd
endlocal & exit /b %nerrors%
- \ No newline at end of file
+
diff --git a/windows/tools/h5import/h5importtestutil.bat b/windows/tools/h5import/h5importtestutil.bat
index c2efe87..e330c41 100644
--- a/windows/tools/h5import/h5importtestutil.bat
+++ b/windows/tools/h5import/h5importtestutil.bat
@@ -67,7 +67,7 @@ goto main
%h5dump_bin% %5 > log1
popd
- fc /w tmp_testfiles\log1 log2 | find "FC: no diff" > nul
+ fc /w tmp_testfiles\log1 log2 > nul
if %errorlevel% neq 0 set err=1
del /f log2 tmp_testfiles\log1
if "%err%"=="1" (
@@ -169,4 +169,4 @@ goto main
popd
endlocal & exit /b %errors%
- \ No newline at end of file
+
diff --git a/windows/tools/h5jam/testh5jam.bat b/windows/tools/h5jam/testh5jam.bat
index d3522f1..9a9c5d4 100644
--- a/windows/tools/h5jam/testh5jam.bat
+++ b/windows/tools/h5jam/testh5jam.bat
@@ -249,7 +249,7 @@ rem
rem Compare to 'cmpfile', result is set in result1
set tfile=tt1
%getub_bin% -c %size% %hfile% > %tfile%
- fc /w %cmpfile% %tfile% | find "FC: no diff" > nul
+ fc /w %cmpfile% %tfile% > nul
if %errorlevel% neq 0 (
fc /w %cmpfile% %file%
set result1=1
@@ -595,4 +595,4 @@ rem ############################################################################
popd
endlocal & exit /b %nerrors%
- \ No newline at end of file
+
diff --git a/windows/tools/h5ls/testh5ls.bat b/windows/tools/h5ls/testh5ls.bat
index 0329447..3baeda8 100644
--- a/windows/tools/h5ls/testh5ls.bat
+++ b/windows/tools/h5ls/testh5ls.bat
@@ -124,7 +124,7 @@ rem %2 and on -- argument for the h5ls tool
rem call :testing CREATED %params%
rem copy %actual% %expect% > nul
) else (
- fc /w %expect% %actual% | find "FC: no diff" > nul
+ fc /w %expect% %actual% > nul
if !errorlevel! equ 0 (
call :testing PASSED %params%
) else (
diff --git a/windows/tools/h5mkgrp/testh5mkgrp.bat b/windows/tools/h5mkgrp/testh5mkgrp.bat
index 112c106..15de2cd 100644
--- a/windows/tools/h5mkgrp/testh5mkgrp.bat
+++ b/windows/tools/h5mkgrp/testh5mkgrp.bat
@@ -148,7 +148,7 @@ rem
rem copy %actual% %expect%
rem )
- fc /w %expect_parsed% %expect_parsed% | find "FC: no diff" > nul
+ fc /w %expect_parsed% %expect_parsed% > nul
if %errorlevel% equ 0 (
call :verify_h5ls PASSED %*
) else (
@@ -251,4 +251,4 @@ rem ############################################################################
popd
endlocal & exit /b %nerrors%
- \ No newline at end of file
+
diff --git a/windows/tools/h5stat/testh5stat.bat b/windows/tools/h5stat/testh5stat.bat
index 3bc6ab0..06ab4ba 100644
--- a/windows/tools/h5stat/testh5stat.bat
+++ b/windows/tools/h5stat/testh5stat.bat
@@ -99,7 +99,7 @@ rem
call :testing CREATED %stat% %params%
copy /y %actual% %expect%
) else (
- fc /w %expect% %actual% | find "FC: no diff" > nul
+ fc /w %expect% %actual% > nul
if !errorlevel! equ 0 (
call :testing PASSED %stat% %params%
) else (
@@ -173,4 +173,4 @@ rem ############################################################################
popd
endlocal & exit /b %nerrors%
- \ No newline at end of file
+
summary='file diffstat' width='100%'> -rw-r--r--Doc/library/asyncore.rst13
-rw-r--r--Doc/library/atexit.rst21
-rw-r--r--Doc/library/base64.rst11
-rw-r--r--Doc/library/binary.rst23
-rw-r--r--Doc/library/binascii.rst9
-rw-r--r--Doc/library/bz2.rst249
-rw-r--r--Doc/library/chunk.rst5
-rw-r--r--Doc/library/cmd.rst4
-rw-r--r--Doc/library/codecs.rst34
-rw-r--r--Doc/library/collections.abc.rst182
-rw-r--r--Doc/library/collections.rst1164
-rw-r--r--Doc/library/concurrency.rst32
-rw-r--r--Doc/library/concurrent.futures.rst38
-rw-r--r--Doc/library/concurrent.rst6
-rw-r--r--Doc/library/contextlib.rst347
-rw-r--r--Doc/library/copyreg.rst8
-rw-r--r--Doc/library/crypt.rst123
-rw-r--r--Doc/library/crypto.rst1
-rw-r--r--Doc/library/csv.rst18
-rw-r--r--Doc/library/ctypes.rst115
-rw-r--r--Doc/library/curses.rst36
-rw-r--r--Doc/library/datatypes.rst3
-rw-r--r--Doc/library/datetime.rst128
-rw-r--r--Doc/library/debug.rst3
-rw-r--r--Doc/library/decimal.rst184
-rw-r--r--Doc/library/depgraph-output.pngbin0 -> 24719 bytes-rw-r--r--Doc/library/development.rst3
-rw-r--r--Doc/library/difflib.rst4
-rw-r--r--Doc/library/dis.rst19
-rw-r--r--Doc/library/distutils.rst12
-rw-r--r--Doc/library/doctest.rst68
-rw-r--r--Doc/library/email.errors.rst21
-rw-r--r--Doc/library/email.generator.rst65
-rw-r--r--Doc/library/email.header.rst6
-rw-r--r--Doc/library/email.headerregistry.rst452
-rw-r--r--Doc/library/email.message.rst8
-rw-r--r--Doc/library/email.mime.rst6
-rw-r--r--Doc/library/email.parser.rst81
-rw-r--r--Doc/library/email.policy.rst497
-rw-r--r--Doc/library/email.rst2
-rw-r--r--Doc/library/email.util.rst57
-rw-r--r--Doc/library/exceptions.rst247
-rw-r--r--Doc/library/faulthandler.rst136
-rw-r--r--Doc/library/fcntl.rst9
-rw-r--r--Doc/library/filecmp.rst21
-rw-r--r--Doc/library/fileinput.rst9
-rw-r--r--Doc/library/ftplib.rst65
-rw-r--r--Doc/library/functions.rst213
-rw-r--r--Doc/library/functools.rst18
-rw-r--r--Doc/library/gc.rst39
-rw-r--r--Doc/library/gettext.rst7
-rw-r--r--Doc/library/gzip.rst51
-rw-r--r--Doc/library/hmac.rst35
-rw-r--r--Doc/library/html.entities.rst23
-rw-r--r--Doc/library/html.parser.rst24
-rw-r--r--Doc/library/html.rst7
-rw-r--r--Doc/library/http.client.rst21
-rw-r--r--Doc/library/http.cookiejar.rst11
-rw-r--r--Doc/library/http.cookies.rst5
-rw-r--r--Doc/library/http.rst11
-rw-r--r--Doc/library/http.server.rst59
-rw-r--r--Doc/library/imaplib.rst27
-rw-r--r--Doc/library/imp.rst128
-rw-r--r--Doc/library/importlib.rst501
-rw-r--r--Doc/library/index.rst5
-rw-r--r--Doc/library/inspect.rst370
-rw-r--r--Doc/library/internet.rst4
-rw-r--r--Doc/library/io.rst166
-rw-r--r--Doc/library/ipaddress.rst804
-rw-r--r--Doc/library/ipc.rst6
-rw-r--r--Doc/library/itertools.rst51
-rw-r--r--Doc/library/locale.rst7
-rw-r--r--Doc/library/logging.handlers.rst109
-rw-r--r--Doc/library/logging.rst41
-rw-r--r--Doc/library/lzma.rst382
-rw-r--r--Doc/library/markup.rst9
-rw-r--r--Doc/library/math.rst13
-rw-r--r--Doc/library/mmap.rst12
-rw-r--r--Doc/library/msvcrt.rst15
-rw-r--r--Doc/library/multiprocessing.rst375
-rw-r--r--Doc/library/nntplib.rst16
-rw-r--r--Doc/library/numeric.rst6
-rw-r--r--Doc/library/os.path.rst14
-rw-r--r--Doc/library/os.rst1482
-rw-r--r--Doc/library/ossaudiodev.rst14
-rw-r--r--Doc/library/pdb.rst5
-rw-r--r--Doc/library/pickle.rst61
-rw-r--r--Doc/library/pkgutil.rst85
-rw-r--r--Doc/library/platform.rst22
-rw-r--r--Doc/library/random.rst9
-rw-r--r--Doc/library/re.rst20
-rw-r--r--Doc/library/readline.rst4
-rw-r--r--Doc/library/resource.rst8
-rw-r--r--Doc/library/sched.rst66
-rw-r--r--Doc/library/select.rst113
-rw-r--r--Doc/library/shlex.rst37
-rw-r--r--Doc/library/shutil.rst295
-rw-r--r--Doc/library/signal.rst169
-rw-r--r--Doc/library/site.rst33
-rw-r--r--Doc/library/smtpd.rst20
-rw-r--r--Doc/library/smtplib.rst65
-rw-r--r--Doc/library/socket.rst414
-rw-r--r--Doc/library/socketserver.rst20
-rw-r--r--Doc/library/someos.rst24
-rw-r--r--Doc/library/sqlite3.rst16
-rw-r--r--Doc/library/ssl.rst269
-rw-r--r--Doc/library/stat.rst11
-rw-r--r--Doc/library/stdtypes.rst1651
-rw-r--r--Doc/library/string.rst2
-rw-r--r--Doc/library/strings.rst27
-rw-r--r--Doc/library/struct.rst21
-rw-r--r--Doc/library/subprocess.rst206
-rw-r--r--Doc/library/sys.rst234
-rw-r--r--Doc/library/syslog.rst10
-rw-r--r--Doc/library/tarfile.rst44
-rw-r--r--Doc/library/telnetlib.rst6
-rw-r--r--Doc/library/tempfile.rst8
-rw-r--r--Doc/library/test.rst167
-rw-r--r--Doc/library/text.rst26
-rw-r--r--Doc/library/textwrap.rst44
-rw-r--r--Doc/library/threading.rst393
-rw-r--r--Doc/library/time.rst227
-rw-r--r--Doc/library/timeit.rst33
-rw-r--r--Doc/library/tkinter.rst64
-rw-r--r--Doc/library/tokenize.rst108
-rw-r--r--Doc/library/types.rst151
-rw-r--r--Doc/library/unicodedata.rst13
-rw-r--r--Doc/library/unittest.mock-examples.rst1246
-rw-r--r--Doc/library/unittest.mock.rst2225
-rw-r--r--Doc/library/unittest.rst80
-rw-r--r--Doc/library/urllib.error.rst12
-rw-r--r--Doc/library/urllib.parse.rst13
-rw-r--r--Doc/library/urllib.request.rst172
-rw-r--r--Doc/library/urllib.rst9
-rw-r--r--Doc/library/venv.rst180
-rw-r--r--Doc/library/warnings.rst3
-rw-r--r--Doc/library/webbrowser.rst97
-rw-r--r--Doc/library/winreg.rst51
-rw-r--r--Doc/library/wsgiref.rst3
-rw-r--r--Doc/library/xml.dom.minidom.rst7
-rw-r--r--Doc/library/xml.etree.elementtree.rst502
-rw-r--r--Doc/library/xml.rst29
-rw-r--r--Doc/library/xml.sax.utils.rst2
-rw-r--r--Doc/library/xmlrpc.client.rst53
-rw-r--r--Doc/library/xmlrpc.rst12
-rw-r--r--Doc/library/xmlrpc.server.rst28
-rw-r--r--Doc/library/zipfile.rst36
-rw-r--r--Doc/library/zipimport.rst5
-rw-r--r--Doc/library/zlib.rst105
-rw-r--r--Doc/license.rst2
-rw-r--r--Doc/make.bat2
-rw-r--r--Doc/reference/datamodel.rst239
-rw-r--r--Doc/reference/expressions.rst33
-rw-r--r--Doc/reference/import.rst697
-rw-r--r--Doc/reference/index.rst1
-rw-r--r--Doc/reference/lexical_analysis.rst41
-rw-r--r--Doc/reference/simple_stmts.rst236
-rw-r--r--Doc/tools/sphinxext/indexsidebar.html2
-rw-r--r--Doc/tools/sphinxext/layout.html59
-rw-r--r--Doc/tools/sphinxext/patchlevel.py6
-rw-r--r--Doc/tools/sphinxext/pyspecific.py49
-rw-r--r--Doc/tools/sphinxext/susp-ignored.csv384
-rw-r--r--Doc/tutorial/classes.rst5
-rw-r--r--Doc/tutorial/datastructures.rst24
-rw-r--r--Doc/tutorial/interpreter.rst14
-rw-r--r--Doc/tutorial/introduction.rst2
-rw-r--r--Doc/tutorial/stdlib.rst2
-rw-r--r--Doc/tutorial/stdlib2.rst6
-rw-r--r--Doc/using/cmdline.rst55
-rw-r--r--Doc/using/index.rst2
-rw-r--r--Doc/using/scripts.rst12
-rw-r--r--Doc/using/venv-create.inc85
-rw-r--r--Doc/using/windows.rst300
-rw-r--r--Doc/whatsnew/2.0.rst10
-rw-r--r--Doc/whatsnew/2.1.rst8
-rw-r--r--Doc/whatsnew/2.2.rst22
-rw-r--r--Doc/whatsnew/2.3.rst40
-rw-r--r--Doc/whatsnew/2.4.rst44
-rw-r--r--Doc/whatsnew/2.5.rst58
-rw-r--r--Doc/whatsnew/3.0.rst2
-rw-r--r--Doc/whatsnew/3.2.rst8
-rw-r--r--Doc/whatsnew/3.3.rst2511
-rw-r--r--Doc/whatsnew/changelog.rst6
-rw-r--r--Doc/whatsnew/index.rst9
-rw-r--r--Grammar/Grammar5
-rw-r--r--Include/Python-ast.h61
-rw-r--r--Include/Python.h39
-rw-r--r--Include/abstract.h40
-rw-r--r--Include/asdl.h9
-rw-r--r--Include/ast.h1
-rw-r--r--Include/bytesobject.h2
-rw-r--r--Include/code.h6
-rw-r--r--Include/codecs.h2
-rw-r--r--Include/compile.h14
-rw-r--r--Include/complexobject.h9
-rw-r--r--Include/datetime.h6
-rw-r--r--Include/descrobject.h1
-rw-r--r--Include/dictobject.h94
-rw-r--r--Include/dtoa.h2
-rw-r--r--Include/errcode.h1
-rw-r--r--Include/fileutils.h2
-rw-r--r--Include/floatobject.h23
-rw-r--r--Include/frameobject.h46
-rw-r--r--Include/funcobject.h2
-rw-r--r--Include/genobject.h24
-rw-r--r--Include/graminit.h1
-rw-r--r--Include/import.h34
-rw-r--r--Include/intrcheck.h6
-rw-r--r--Include/listobject.h3
-rw-r--r--Include/longintrepr.h2
-rw-r--r--Include/longobject.h28
-rw-r--r--Include/memoryobject.h84
-rw-r--r--Include/methodobject.h24
-rw-r--r--Include/modsupport.h5
-rw-r--r--Include/moduleobject.h4
-rw-r--r--Include/namespaceobject.h17
-rw-r--r--Include/node.h1
-rw-r--r--Include/object.h70
-rw-r--r--Include/objimpl.h13
-rw-r--r--Include/opcode.h188
-rw-r--r--Include/parsetok.h9
-rw-r--r--Include/patchlevel.h10
-rw-r--r--Include/py_curses.h1
-rw-r--r--Include/pydebug.h3
-rw-r--r--Include/pyerrors.h85
-rw-r--r--Include/pyexpat.h2
-rw-r--r--Include/pymacro.h69
-rw-r--r--Include/pymath.h6
-rw-r--r--Include/pyport.h2
-rw-r--r--Include/pystate.h18
-rw-r--r--Include/pythonrun.h16
-rw-r--r--Include/pythread.h4
-rw-r--r--Include/pytime.h46
-rw-r--r--Include/setobject.h3
-rw-r--r--Include/structmember.h65
-rw-r--r--Include/symtable.h11
-rw-r--r--Include/timefuncs.h25
-rw-r--r--Include/traceback.h38
-rw-r--r--Include/tupleobject.h3
-rw-r--r--Include/ucnhash.h6
-rw-r--r--Include/unicodeobject.h1116
-rw-r--r--LICENSE1
-rw-r--r--Lib/_dummy_thread.py6
-rw-r--r--Lib/_osx_support.py488
-rw-r--r--Lib/_pyio.py189
-rw-r--r--Lib/_strptime.py6
-rw-r--r--Lib/abc.py27
-rw-r--r--Lib/aifc.py12
-rw-r--r--Lib/argparse.py94
-rw-r--r--Lib/ast.py1
-rw-r--r--Lib/asynchat.py16
-rw-r--r--Lib/asyncore.py18
-rwxr-xr-xLib/base64.py50
-rw-r--r--Lib/binhex.py1
-rw-r--r--Lib/bz2.py504
-rwxr-xr-xLib/cgi.py13
-rw-r--r--Lib/cgitb.py1
-rw-r--r--Lib/code.py19
-rw-r--r--Lib/codecs.py21
-rw-r--r--Lib/collections/__init__.py (renamed from Lib/collections.py)168
-rw-r--r--Lib/collections/__main__.py38
-rw-r--r--Lib/collections/abc.py (renamed from Lib/_abcoll.py)51
-rw-r--r--Lib/concurrent/futures/_base.py27
-rw-r--r--Lib/concurrent/futures/process.py136
-rw-r--r--Lib/concurrent/futures/thread.py2
-rw-r--r--Lib/configparser.py23
-rw-r--r--Lib/contextlib.py126
-rw-r--r--Lib/copy.py77
-rw-r--r--Lib/crypt.py62
-rw-r--r--Lib/ctypes/__init__.py4
-rw-r--r--Lib/ctypes/test/test_callbacks.py2
-rw-r--r--Lib/ctypes/test/test_memfunctions.py2
-rw-r--r--Lib/ctypes/test/test_parameters.py9
-rw-r--r--Lib/ctypes/test/test_pep3118.py76
-rw-r--r--Lib/ctypes/test/test_python_api.py3
-rw-r--r--Lib/ctypes/test/test_refcounts.py3
-rw-r--r--Lib/ctypes/test/test_stringptr.py2
-rw-r--r--Lib/ctypes/test/test_win32.py22
-rw-r--r--Lib/ctypes/util.py37
-rw-r--r--Lib/curses/__init__.py46
-rw-r--r--Lib/curses/wrapper.py50
-rw-r--r--Lib/datetime.py75
-rw-r--r--Lib/decimal.py337
-rw-r--r--Lib/difflib.py23
-rw-r--r--Lib/dis.py6
-rw-r--r--Lib/distutils/__init__.py2
-rw-r--r--Lib/distutils/command/bdist_wininst.py6
-rw-r--r--Lib/distutils/command/build_ext.py11
-rw-r--r--Lib/distutils/command/build_scripts.py7
-rw-r--r--Lib/distutils/command/wininst-10.0-amd64.exebin0 -> 222208 bytes-rw-r--r--Lib/distutils/command/wininst-10.0.exebin0 -> 190976 bytes-rw-r--r--Lib/distutils/cygwinccompiler.py3
-rw-r--r--Lib/distutils/sysconfig.py151
-rw-r--r--Lib/distutils/tests/test_archive_util.py40
-rw-r--r--Lib/distutils/tests/test_bdist_rpm.py9
-rw-r--r--Lib/distutils/tests/test_sysconfig.py51
-rw-r--r--Lib/distutils/tests/test_util.py9
-rw-r--r--Lib/distutils/unixccompiler.py70
-rw-r--r--Lib/distutils/util.py96
-rw-r--r--Lib/doctest.py9
-rw-r--r--Lib/email/_encoded_words.py221
-rw-r--r--Lib/email/_header_value_parser.py2953
-rw-r--r--Lib/email/_parseaddr.py31
-rw-r--r--Lib/email/_policybase.py358
-rw-r--r--Lib/email/architecture.rst216
-rw-r--r--Lib/email/errors.py60
-rw-r--r--Lib/email/feedparser.py73
-rw-r--r--Lib/email/generator.py107
-rw-r--r--Lib/email/header.py45
-rw-r--r--Lib/email/headerregistry.py583
-rw-r--r--Lib/email/message.py86
-rw-r--r--Lib/email/mime/text.py14
-rw-r--r--Lib/email/parser.py43
-rw-r--r--Lib/email/policy.py188
-rw-r--r--Lib/email/utils.py147
-rw-r--r--Lib/encodings/cp037.py1
-rw-r--r--Lib/encodings/cp500.py1
-rw-r--r--Lib/encodings/cp65001.py40
-rw-r--r--Lib/encodings/hp_roman8.py376
-rw-r--r--Lib/encodings/idna.py22
-rw-r--r--Lib/encodings/iso8859_1.py1
-rw-r--r--Lib/encodings/mac_latin2.py409
-rw-r--r--Lib/encodings/palmos.py311
-rw-r--r--Lib/encodings/ptcp154.py399
-rw-r--r--Lib/fileinput.py3
-rw-r--r--Lib/fnmatch.py10
-rw-r--r--Lib/ftplib.py107
-rw-r--r--Lib/functools.py179
-rw-r--r--Lib/getopt.py19
-rw-r--r--Lib/getpass.py4
-rw-r--r--Lib/gettext.py2
-rw-r--r--Lib/gzip.py105
-rw-r--r--Lib/hmac.py4
-rw-r--r--Lib/html/entities.py2236
-rw-r--r--Lib/html/parser.py53
-rw-r--r--Lib/http/client.py175
-rw-r--r--Lib/http/cookiejar.py8
-rw-r--r--Lib/http/cookies.py2
-rw-r--r--Lib/http/server.py97
-rw-r--r--Lib/idlelib/AutoComplete.py3
-rw-r--r--Lib/idlelib/ColorDelegator.py13
-rw-r--r--Lib/idlelib/EditorWindow.py19
-rw-r--r--Lib/idlelib/IOBinding.py8
-rw-r--r--Lib/idlelib/NEWS.txt23
-rw-r--r--Lib/idlelib/PathBrowser.py7
-rw-r--r--Lib/idlelib/PyShell.py16
-rw-r--r--Lib/idlelib/ScriptBinding.py12
-rw-r--r--Lib/idlelib/__main__.py9
-rw-r--r--Lib/idlelib/configHandler.py3
-rw-r--r--Lib/idlelib/idlever.py2
-rw-r--r--Lib/idlelib/macosxSupport.py16
-rw-r--r--Lib/idlelib/rpc.py26
-rw-r--r--Lib/idlelib/run.py14
-rw-r--r--Lib/imaplib.py78
-rw-r--r--Lib/imp.py257
-rw-r--r--Lib/importlib/__init__.py144
-rw-r--r--Lib/importlib/_bootstrap.py1641
-rw-r--r--Lib/importlib/abc.py164
-rw-r--r--Lib/importlib/machinery.py15
-rw-r--r--Lib/importlib/test/__main__.py29
-rw-r--r--Lib/importlib/test/benchmark.py172
-rw-r--r--Lib/importlib/test/extension/test_loader.py59
-rw-r--r--Lib/importlib/test/import_/test_api.py22
-rw-r--r--Lib/importlib/test/import_/test_packages.py37
-rw-r--r--Lib/importlib/test/import_/test_path.py131
-rw-r--r--Lib/importlib/test/regrtest.py35
-rw-r--r--Lib/importlib/util.py16
-rw-r--r--Lib/inspect.py1011
-rw-r--r--Lib/io.py3
-rw-r--r--Lib/ipaddress.py2094
-rw-r--r--Lib/json/decoder.py3
-rw-r--r--Lib/lib2to3/__main__.py4
-rw-r--r--Lib/lib2to3/fixer_base.py4
-rw-r--r--Lib/lib2to3/pytree.py20
-rw-r--r--Lib/lib2to3/refactor.py4
-rw-r--r--Lib/lib2to3/tests/test_pytree.py17
-rw-r--r--Lib/logging/__init__.py164
-rw-r--r--Lib/logging/config.py18
-rw-r--r--Lib/logging/handlers.py152
-rw-r--r--Lib/lzma.py454
-rw-r--r--Lib/mailbox.py22
-rw-r--r--Lib/mailcap.py6
-rw-r--r--Lib/mimetypes.py3
-rw-r--r--Lib/modulefinder.py21
-rw-r--r--Lib/multiprocessing/__init__.py57
-rw-r--r--Lib/multiprocessing/connection.py659
-rw-r--r--Lib/multiprocessing/dummy/__init__.py17
-rw-r--r--Lib/multiprocessing/dummy/connection.py12
-rw-r--r--Lib/multiprocessing/forking.py189
-rw-r--r--Lib/multiprocessing/heap.py45
-rw-r--r--Lib/multiprocessing/managers.py171
-rw-r--r--Lib/multiprocessing/pool.py115
-rw-r--r--Lib/multiprocessing/process.py47
-rw-r--r--Lib/multiprocessing/queues.py46
-rw-r--r--Lib/multiprocessing/reduction.py407
-rw-r--r--Lib/multiprocessing/sharedctypes.py38
-rw-r--r--Lib/multiprocessing/synchronize.py94
-rw-r--r--Lib/multiprocessing/util.py71
-rw-r--r--Lib/nntplib.py20
-rw-r--r--Lib/numbers.py14
-rw-r--r--Lib/opcode.py9
-rw-r--r--Lib/optparse.py31
-rw-r--r--Lib/os.py239
-rwxr-xr-xLib/pdb.py95
-rw-r--r--Lib/pickle.py18
-rw-r--r--Lib/pickletools.py7
-rw-r--r--Lib/pipes.py23
-rw-r--r--Lib/pkgutil.py188
-rw-r--r--Lib/plat-linux/CDROM.py (renamed from Lib/plat-linux2/CDROM.py)0
-rw-r--r--Lib/plat-linux/DLFCN.py (renamed from Lib/plat-linux2/DLFCN.py)0
-rw-r--r--Lib/plat-linux/IN.py (renamed from Lib/plat-linux2/IN.py)0
-rw-r--r--Lib/plat-linux/TYPES.py (renamed from Lib/plat-linux2/TYPES.py)0
-rwxr-xr-xLib/plat-linux/regen (renamed from Lib/plat-linux2/regen)0
-rwxr-xr-xLib/platform.py135
-rw-r--r--Lib/plistlib.py40
-rw-r--r--Lib/poplib.py17
-rw-r--r--Lib/posixpath.py3
-rwxr-xr-xLib/profile.py36
-rw-r--r--Lib/pstats.py3
-rw-r--r--Lib/py_compile.py7
-rw-r--r--Lib/pyclbr.py25
-rwxr-xr-xLib/pydoc.py345
-rw-r--r--Lib/pydoc_data/topics.py49
-rw-r--r--Lib/queue.py140
-rw-r--r--Lib/random.py2
-rw-r--r--Lib/re.py15
-rw-r--r--Lib/runpy.py72
-rw-r--r--Lib/sched.py86
-rw-r--r--Lib/shlex.py20
-rw-r--r--Lib/shutil.py463
-rw-r--r--Lib/site.py110
-rwxr-xr-xLib/smtpd.py297
-rw-r--r--Lib/smtplib.py152
-rw-r--r--Lib/socket.py21
-rw-r--r--Lib/socketserver.py24
-rw-r--r--Lib/sqlite3/test/dbapi.py2
-rw-r--r--Lib/sqlite3/test/factory.py4
-rw-r--r--Lib/sqlite3/test/hooks.py54
-rw-r--r--Lib/sqlite3/test/regression.py2
-rw-r--r--Lib/sqlite3/test/transactions.py2
-rw-r--r--Lib/sqlite3/test/types.py4
-rw-r--r--Lib/sqlite3/test/userfunctions.py2
-rw-r--r--Lib/sre_compile.py4
-rw-r--r--Lib/sre_parse.py68
-rw-r--r--Lib/ssl.py98
-rw-r--r--Lib/stat.py129
-rw-r--r--Lib/string.py24
-rw-r--r--Lib/subprocess.py839
-rw-r--r--Lib/sysconfig.py407
-rwxr-xr-xLib/tabnanny.py3
-rw-r--r--Lib/tarfile.py399
-rw-r--r--Lib/tempfile.py56
-rw-r--r--Lib/test/buffer_tests.py8
-rw-r--r--Lib/test/crashers/README4
-rw-r--r--Lib/test/crashers/borrowed_ref_1.py29
-rw-r--r--Lib/test/crashers/borrowed_ref_2.py38
-rw-r--r--Lib/test/crashers/compiler_recursion.py12
-rw-r--r--Lib/test/crashers/loosing_mro_ref.py35
-rw-r--r--Lib/test/crashers/nasty_eq_vs_dict.py47
-rw-r--r--Lib/test/datetimetester.py128
-rw-r--r--Lib/test/decimaltestdata/extra.decTest13
-rw-r--r--Lib/test/dh512.pem9
-rw-r--r--Lib/test/exception_hierarchy.txt21
-rw-r--r--Lib/test/fork_wait.py10
-rw-r--r--Lib/test/future_test1.py (renamed from Lib/test/test_future1.py)0
-rw-r--r--Lib/test/future_test2.py (renamed from Lib/test/test_future2.py)0
-rw-r--r--Lib/test/json_tests/test_dump.py19
-rw-r--r--Lib/test/json_tests/test_scanstring.py11
-rw-r--r--Lib/test/keycert.passwd.pem33
-rw-r--r--Lib/test/list_tests.py41
-rw-r--r--Lib/test/lock_tests.py16
-rw-r--r--Lib/test/mailcap.txt39
-rw-r--r--Lib/test/math_testcases.txt114
-rw-r--r--Lib/test/memory_watchdog.py28
-rw-r--r--Lib/test/mock_socket.py3
-rw-r--r--Lib/test/multibytecodec_support.py (renamed from Lib/test/test_multibytecodec_support.py)17
-rw-r--r--Lib/test/namespace_pkgs/both_portions/foo/one.py1
-rw-r--r--Lib/test/namespace_pkgs/both_portions/foo/two.py1
-rw-r--r--Lib/test/namespace_pkgs/missing_directory.zipbin0 -> 515 bytes-rw-r--r--Lib/test/namespace_pkgs/module_and_namespace_package/a_test.py1
-rw-r--r--Lib/test/namespace_pkgs/module_and_namespace_package/a_test/empty (renamed from Lib/importlib/test/__init__.py)0
-rw-r--r--Lib/test/namespace_pkgs/nested_portion1.zipbin0 -> 556 bytes-rw-r--r--Lib/test/namespace_pkgs/not_a_namespace_pkg/foo/__init__.py (renamed from Lib/email/test/__init__.py)0
-rw-r--r--Lib/test/namespace_pkgs/not_a_namespace_pkg/foo/one.py1
-rw-r--r--Lib/test/namespace_pkgs/portion1/foo/one.py1
-rw-r--r--Lib/test/namespace_pkgs/portion2/foo/two.py1
-rw-r--r--Lib/test/namespace_pkgs/project1/parent/child/one.py1
-rw-r--r--Lib/test/namespace_pkgs/project2/parent/child/two.py1
-rw-r--r--Lib/test/namespace_pkgs/project3/parent/child/three.py1
-rw-r--r--Lib/test/namespace_pkgs/top_level_portion1.zipbin0 -> 332 bytes-rw-r--r--Lib/test/pickletester.py150
-rwxr-xr-xLib/test/regrtest.py281
-rw-r--r--Lib/test/reperf.py4
-rw-r--r--Lib/test/script_helper.py7
-rw-r--r--Lib/test/seq_tests.py7
-rw-r--r--Lib/test/sortperf.py4
-rw-r--r--Lib/test/ssl_key.passwd.pem18
-rw-r--r--Lib/test/ssl_servers.py16
-rw-r--r--Lib/test/string_tests.py85
-rw-r--r--Lib/test/support.py370
-rw-r--r--Lib/test/test__locale.py40
-rw-r--r--Lib/test/test__osx_support.py279
-rw-r--r--Lib/test/test_abc.py196
-rw-r--r--Lib/test/test_abstract_numbers.py2
-rw-r--r--Lib/test/test_aifc.py13
-rw-r--r--Lib/test/test_argparse.py173
-rwxr-xr-xLib/test/test_array.py58
-rw-r--r--Lib/test/test_ast.py442
-rw-r--r--Lib/test/test_asyncore.py197
-rw-r--r--Lib/test/test_base64.py165
-rw-r--r--Lib/test/test_bigmem.py223
-rw-r--r--Lib/test/test_binascii.py34
-rw-r--r--Lib/test/test_bool.py10
-rw-r--r--Lib/test/test_buffer.py4273
-rw-r--r--Lib/test/test_builtin.py157
-rw-r--r--Lib/test/test_bytes.py214
-rw-r--r--Lib/test/test_bz2.py572
-rw-r--r--Lib/test/test_calendar.py223
-rw-r--r--Lib/test/test_capi.py116
-rw-r--r--Lib/test/test_cgi.py19
-rw-r--r--Lib/test/test_cgitb.py55
-rw-r--r--Lib/test/test_cmd.py2
-rw-r--r--Lib/test/test_cmd_line.py16
-rw-r--r--Lib/test/test_cmd_line_script.py111
-rw-r--r--Lib/test/test_code.py2
-rw-r--r--Lib/test/test_code_module.py72
-rw-r--r--Lib/test/test_codeccallbacks.py161
-rw-r--r--Lib/test/test_codecencodings_cn.py39
-rw-r--r--Lib/test/test_codecencodings_hk.py10
-rw-r--r--Lib/test/test_codecencodings_iso2022.py14
-rw-r--r--Lib/test/test_codecencodings_jp.py118
-rw-r--r--Lib/test/test_codecencodings_kr.py39
-rw-r--r--Lib/test/test_codecencodings_tw.py10
-rw-r--r--Lib/test/test_codecmaps_cn.py8
-rw-r--r--Lib/test/test_codecmaps_hk.py4
-rw-r--r--Lib/test/test_codecmaps_jp.py12
-rw-r--r--Lib/test/test_codecmaps_kr.py8
-rw-r--r--Lib/test/test_codecmaps_tw.py9
-rw-r--r--Lib/test/test_codecs.py414
-rw-r--r--Lib/test/test_coding.py4
-rw-r--r--Lib/test/test_collections.py96
-rw-r--r--Lib/test/test_compile.py40
-rw-r--r--Lib/test/test_concurrent_futures.py62
-rw-r--r--Lib/test/test_configparser.py (renamed from Lib/test/test_cfgparser.py)37
-rw-r--r--Lib/test/test_contextlib.py225
-rw-r--r--Lib/test/test_copy.py187
-rw-r--r--Lib/test/test_cprofile.py23
-rw-r--r--Lib/test/test_crashers.py38
-rw-r--r--Lib/test/test_crypt.py19
-rw-r--r--Lib/test/test_curses.py50
-rw-r--r--Lib/test/test_dbm.py4
-rw-r--r--Lib/test/test_decimal.py3443
-rw-r--r--Lib/test/test_deque.py15
-rw-r--r--Lib/test/test_descr.py198
-rw-r--r--Lib/test/test_descrtut.py3
-rw-r--r--Lib/test/test_devpoll.py94
-rw-r--r--Lib/test/test_dict.py120
-rw-r--r--Lib/test/test_dis.py111
-rw-r--r--Lib/test/test_doctest.py446
-rw-r--r--Lib/test/test_dummy_thread.py4
-rw-r--r--Lib/test/test_email.py14
-rw-r--r--Lib/test/test_email/__init__.py150
-rw-r--r--Lib/test/test_email/__main__.py3
-rw-r--r--Lib/test/test_email/data/PyBanner048.gif (renamed from Lib/email/test/data/PyBanner048.gif)bin954 -> 954 bytes-rw-r--r--Lib/test/test_email/data/audiotest.au (renamed from Lib/email/test/data/audiotest.au)bin28144 -> 28144 bytes-rw-r--r--Lib/test/test_email/data/msg_01.txt (renamed from Lib/email/test/data/msg_01.txt)0
-rw-r--r--Lib/test/test_email/data/msg_02.txt (renamed from Lib/email/test/data/msg_02.txt)0
-rw-r--r--Lib/test/test_email/data/msg_03.txt (renamed from Lib/email/test/data/msg_03.txt)0
-rw-r--r--Lib/test/test_email/data/msg_04.txt (renamed from Lib/email/test/data/msg_04.txt)0
-rw-r--r--Lib/test/test_email/data/msg_05.txt (renamed from Lib/email/test/data/msg_05.txt)0
-rw-r--r--Lib/test/test_email/data/msg_06.txt (renamed from Lib/email/test/data/msg_06.txt)0
-rw-r--r--Lib/test/test_email/data/msg_07.txt (renamed from Lib/email/test/data/msg_07.txt)0
-rw-r--r--Lib/test/test_email/data/msg_08.txt (renamed from Lib/email/test/data/msg_08.txt)0
-rw-r--r--Lib/test/test_email/data/msg_09.txt (renamed from Lib/email/test/data/msg_09.txt)0
-rw-r--r--Lib/test/test_email/data/msg_10.txt (renamed from Lib/email/test/data/msg_10.txt)0
-rw-r--r--Lib/test/test_email/data/msg_11.txt (renamed from Lib/email/test/data/msg_11.txt)0
-rw-r--r--Lib/test/test_email/data/msg_12.txt (renamed from Lib/email/test/data/msg_12.txt)0
-rw-r--r--Lib/test/test_email/data/msg_12a.txt (renamed from Lib/email/test/data/msg_12a.txt)0
-rw-r--r--Lib/test/test_email/data/msg_13.txt (renamed from Lib/email/test/data/msg_13.txt)0
-rw-r--r--Lib/test/test_email/data/msg_14.txt (renamed from Lib/email/test/data/msg_14.txt)0
-rw-r--r--Lib/test/test_email/data/msg_15.txt (renamed from Lib/email/test/data/msg_15.txt)0
-rw-r--r--Lib/test/test_email/data/msg_16.txt (renamed from Lib/email/test/data/msg_16.txt)0
-rw-r--r--Lib/test/test_email/data/msg_17.txt (renamed from Lib/email/test/data/msg_17.txt)0
-rw-r--r--Lib/test/test_email/data/msg_18.txt (renamed from Lib/email/test/data/msg_18.txt)0
-rw-r--r--Lib/test/test_email/data/msg_19.txt (renamed from Lib/email/test/data/msg_19.txt)0
-rw-r--r--Lib/test/test_email/data/msg_20.txt (renamed from Lib/email/test/data/msg_20.txt)0
-rw-r--r--Lib/test/test_email/data/msg_21.txt (renamed from Lib/email/test/data/msg_21.txt)0
-rw-r--r--Lib/test/test_email/data/msg_22.txt (renamed from Lib/email/test/data/msg_22.txt)0
-rw-r--r--Lib/test/test_email/data/msg_23.txt (renamed from Lib/email/test/data/msg_23.txt)0
-rw-r--r--Lib/test/test_email/data/msg_24.txt (renamed from Lib/email/test/data/msg_24.txt)0
-rw-r--r--Lib/test/test_email/data/msg_25.txt (renamed from Lib/email/test/data/msg_25.txt)0
-rw-r--r--Lib/test/test_email/data/msg_26.txt (renamed from Lib/email/test/data/msg_26.txt)0
-rw-r--r--Lib/test/test_email/data/msg_27.txt (renamed from Lib/email/test/data/msg_27.txt)0
-rw-r--r--Lib/test/test_email/data/msg_28.txt (renamed from Lib/email/test/data/msg_28.txt)0
-rw-r--r--Lib/test/test_email/data/msg_29.txt (renamed from Lib/email/test/data/msg_29.txt)0
-rw-r--r--Lib/test/test_email/data/msg_30.txt (renamed from Lib/email/test/data/msg_30.txt)0
-rw-r--r--Lib/test/test_email/data/msg_31.txt (renamed from Lib/email/test/data/msg_31.txt)0
-rw-r--r--Lib/test/test_email/data/msg_32.txt (renamed from Lib/email/test/data/msg_32.txt)0
-rw-r--r--Lib/test/test_email/data/msg_33.txt (renamed from Lib/email/test/data/msg_33.txt)0
-rw-r--r--Lib/test/test_email/data/msg_34.txt (renamed from Lib/email/test/data/msg_34.txt)0
-rw-r--r--Lib/test/test_email/data/msg_35.txt (renamed from Lib/email/test/data/msg_35.txt)0
-rw-r--r--Lib/test/test_email/data/msg_36.txt (renamed from Lib/email/test/data/msg_36.txt)0
-rw-r--r--Lib/test/test_email/data/msg_37.txt (renamed from Lib/email/test/data/msg_37.txt)0
-rw-r--r--Lib/test/test_email/data/msg_38.txt (renamed from Lib/email/test/data/msg_38.txt)0
-rw-r--r--Lib/test/test_email/data/msg_39.txt (renamed from Lib/email/test/data/msg_39.txt)0
-rw-r--r--Lib/test/test_email/data/msg_40.txt (renamed from Lib/email/test/data/msg_40.txt)0
-rw-r--r--Lib/test/test_email/data/msg_41.txt (renamed from Lib/email/test/data/msg_41.txt)0
-rw-r--r--Lib/test/test_email/data/msg_42.txt (renamed from Lib/email/test/data/msg_42.txt)0
-rw-r--r--Lib/test/test_email/data/msg_43.txt (renamed from Lib/email/test/data/msg_43.txt)0
-rw-r--r--Lib/test/test_email/data/msg_44.txt (renamed from Lib/email/test/data/msg_44.txt)0
-rw-r--r--Lib/test/test_email/data/msg_45.txt (renamed from Lib/email/test/data/msg_45.txt)0
-rw-r--r--Lib/test/test_email/data/msg_46.txt (renamed from Lib/email/test/data/msg_46.txt)0
-rw-r--r--Lib/test/test_email/test__encoded_words.py187
-rw-r--r--Lib/test/test_email/test__header_value_parser.py2552
-rw-r--r--Lib/test/test_email/test_asian_codecs.py (renamed from Lib/email/test/test_email_codecs.py)17
-rw-r--r--Lib/test/test_email/test_defect_handling.py320
-rw-r--r--Lib/test/test_email/test_email.py (renamed from Lib/email/test/test_email.py)403
-rw-r--r--Lib/test/test_email/test_generator.py199
-rw-r--r--Lib/test/test_email/test_headerregistry.py1515
-rw-r--r--Lib/test/test_email/test_message.py18
-rw-r--r--Lib/test/test_email/test_parser.py36
-rw-r--r--Lib/test/test_email/test_pickleable.py74
-rw-r--r--Lib/test/test_email/test_policy.py322
-rw-r--r--Lib/test/test_email/test_utils.py136
-rw-r--r--Lib/test/test_email/torture_test.py (renamed from Lib/email/test/test_email_torture.py)0
-rw-r--r--Lib/test/test_enumerate.py30
-rw-r--r--Lib/test/test_epoll.py3
-rw-r--r--Lib/test/test_exceptions.py123
-rw-r--r--Lib/test/test_extcall.py87
-rw-r--r--Lib/test/test_faulthandler.py593
-rw-r--r--Lib/test/test_fileinput.py630
-rw-r--r--Lib/test/test_fileio.py5
-rw-r--r--Lib/test/test_float.py21
-rw-r--r--Lib/test/test_format.py46
-rw-r--r--Lib/test/test_fractions.py12
-rw-r--r--Lib/test/test_frozen.py26
-rw-r--r--Lib/test/test_ftplib.py180
-rw-r--r--Lib/test/test_funcattrs.py55
-rw-r--r--Lib/test/test_functools.py93
-rw-r--r--Lib/test/test_future.py12
-rw-r--r--Lib/test/test_gc.py143
-rw-r--r--Lib/test/test_gdb.py139
-rw-r--r--Lib/test/test_generators.py42
-rw-r--r--Lib/test/test_genericpath.py49
-rw-r--r--Lib/test/test_genexps.py8
-rw-r--r--Lib/test/test_getargs2.py114
-rw-r--r--Lib/test/test_glob.py6
-rw-r--r--Lib/test/test_grammar.py123
-rw-r--r--Lib/test/test_gzip.py137
-rw-r--r--Lib/test/test_hash.py14
-rw-r--r--Lib/test/test_hashlib.py4
-rw-r--r--Lib/test/test_hmac.py123
-rw-r--r--Lib/test/test_htmlparser.py13
-rw-r--r--Lib/test/test_http_cookiejar.py11
-rw-r--r--Lib/test/test_http_cookies.py9
-rw-r--r--Lib/test/test_httplib.py140
-rw-r--r--Lib/test/test_httpservers.py73
-rw-r--r--Lib/test/test_imaplib.py89
-rw-r--r--Lib/test/test_imp.py172
-rw-r--r--Lib/test/test_import.py528
-rw-r--r--Lib/test/test_importhooks.py13
-rw-r--r--Lib/test/test_importlib.py5
-rw-r--r--Lib/test/test_importlib/__init__.py33
-rw-r--r--Lib/test/test_importlib/__main__.py20
-rw-r--r--Lib/test/test_importlib/abc.py (renamed from Lib/importlib/test/abc.py)0
-rw-r--r--Lib/test/test_importlib/builtin/__init__.py (renamed from Lib/importlib/test/builtin/__init__.py)4
-rw-r--r--Lib/test/test_importlib/builtin/test_finder.py (renamed from Lib/importlib/test/builtin/test_finder.py)4
-rw-r--r--Lib/test/test_importlib/builtin/test_loader.py (renamed from Lib/importlib/test/builtin/test_loader.py)19
-rw-r--r--Lib/test/test_importlib/builtin/util.py (renamed from Lib/importlib/test/builtin/util.py)0
-rw-r--r--Lib/test/test_importlib/extension/__init__.py (renamed from Lib/importlib/test/frozen/__init__.py)4
-rw-r--r--Lib/test/test_importlib/extension/test_case_sensitivity.py (renamed from Lib/importlib/test/extension/test_case_sensitivity.py)12
-rw-r--r--Lib/test/test_importlib/extension/test_finder.py (renamed from Lib/importlib/test/extension/test_finder.py)15
-rw-r--r--Lib/test/test_importlib/extension/test_loader.py79
-rw-r--r--Lib/test/test_importlib/extension/test_path_hook.py (renamed from Lib/importlib/test/extension/test_path_hook.py)5
-rw-r--r--Lib/test/test_importlib/extension/util.py (renamed from Lib/importlib/test/extension/util.py)5
-rw-r--r--Lib/test/test_importlib/frozen/__init__.py (renamed from Lib/importlib/test/source/__init__.py)4
-rw-r--r--Lib/test/test_importlib/frozen/test_finder.py (renamed from Lib/importlib/test/frozen/test_finder.py)4
-rw-r--r--Lib/test/test_importlib/frozen/test_loader.py (renamed from Lib/importlib/test/frozen/test_loader.py)40
-rw-r--r--Lib/test/test_importlib/import_/__init__.py (renamed from Lib/importlib/test/import_/__init__.py)4
-rw-r--r--Lib/test/test_importlib/import_/test___package__.py (renamed from Lib/importlib/test/import_/test___package__.py)2
-rw-r--r--Lib/test/test_importlib/import_/test_api.py67
-rw-r--r--Lib/test/test_importlib/import_/test_caching.py (renamed from Lib/importlib/test/import_/test_caching.py)6
-rw-r--r--Lib/test/test_importlib/import_/test_fromlist.py (renamed from Lib/importlib/test/import_/test_fromlist.py)28
-rw-r--r--Lib/test/test_importlib/import_/test_meta_path.py (renamed from Lib/importlib/test/import_/test_meta_path.py)22
-rw-r--r--Lib/test/test_importlib/import_/test_packages.py112
-rw-r--r--Lib/test/test_importlib/import_/test_path.py98
-rw-r--r--Lib/test/test_importlib/import_/test_relative_imports.py (renamed from Lib/importlib/test/import_/test_relative_imports.py)14
-rw-r--r--Lib/test/test_importlib/import_/util.py (renamed from Lib/importlib/test/import_/util.py)1
-rw-r--r--Lib/test/test_importlib/regrtest.py17
-rw-r--r--Lib/test/test_importlib/source/__init__.py (renamed from Lib/importlib/test/extension/__init__.py)4
-rw-r--r--Lib/test/test_importlib/source/test_abc_loader.py (renamed from Lib/importlib/test/source/test_abc_loader.py)90
-rw-r--r--Lib/test/test_importlib/source/test_case_sensitivity.py (renamed from Lib/importlib/test/source/test_case_sensitivity.py)16
-rw-r--r--Lib/test/test_importlib/source/test_file_loader.py (renamed from Lib/importlib/test/source/test_file_loader.py)131
-rw-r--r--Lib/test/test_importlib/source/test_finder.py (renamed from Lib/importlib/test/source/test_finder.py)61
-rw-r--r--Lib/test/test_importlib/source/test_path_hook.py (renamed from Lib/importlib/test/source/test_path_hook.py)12
-rw-r--r--Lib/test/test_importlib/source/test_source_encoding.py (renamed from Lib/importlib/test/source/test_source_encoding.py)8
-rw-r--r--Lib/test/test_importlib/source/util.py (renamed from Lib/importlib/test/source/util.py)0
-rw-r--r--Lib/test/test_importlib/test_abc.py (renamed from Lib/importlib/test/test_abc.py)20
-rw-r--r--Lib/test/test_importlib/test_api.py (renamed from Lib/importlib/test/test_api.py)96
-rw-r--r--Lib/test/test_importlib/test_locks.py129
-rw-r--r--Lib/test/test_importlib/test_util.py (renamed from Lib/importlib/test/test_util.py)100
-rw-r--r--Lib/test/test_importlib/util.py (renamed from Lib/importlib/test/util.py)8
-rw-r--r--Lib/test/test_inspect.py1133
-rw-r--r--Lib/test/test_int.py12
-rw-r--r--Lib/test/test_io.py118
-rw-r--r--Lib/test/test_ipaddress.py1649
-rw-r--r--Lib/test/test_iter.py43
-rw-r--r--Lib/test/test_itertools.py415
-rw-r--r--Lib/test/test_keywordonlyarg.py2
-rw-r--r--Lib/test/test_lib2to3.py4
-rw-r--r--Lib/test/test_list.py28
-rw-r--r--Lib/test/test_locale.py4
-rw-r--r--Lib/test/test_logging.py1787
-rw-r--r--Lib/test/test_long.py80
-rw-r--r--Lib/test/test_lzma.py1517
-rw-r--r--Lib/test/test_mailbox.py56
-rw-r--r--Lib/test/test_mailcap.py221
-rw-r--r--Lib/test/test_marshal.py17
-rw-r--r--Lib/test/test_math.py43
-rw-r--r--Lib/test/test_memoryio.py2
-rw-r--r--Lib/test/test_memoryview.py93
-rw-r--r--Lib/test/test_metaclass.py16
-rw-r--r--Lib/test/test_minidom.py77
-rw-r--r--Lib/test/test_mmap.py32
-rw-r--r--Lib/test/test_module.py99
-rw-r--r--Lib/test/test_modulefinder.py48
-rw-r--r--Lib/test/test_multibytecodec.py16
-rw-r--r--Lib/test/test_multiprocessing.py1024
-rw-r--r--Lib/test/test_mutants.py291
-rw-r--r--Lib/test/test_namespace_pkgs.py294
-rw-r--r--Lib/test/test_nntplib.py27
-rw-r--r--Lib/test/test_ntpath.py7
-rw-r--r--Lib/test/test_numeric_tower.py2
-rw-r--r--Lib/test/test_optparse.py20
-rw-r--r--Lib/test/test_os.py840
-rw-r--r--Lib/test/test_ossaudiodev.py16
-rw-r--r--Lib/test/test_osx_env.py3
-rw-r--r--Lib/test/test_parser.py11
-rw-r--r--Lib/test/test_pdb.py3
-rw-r--r--Lib/test/test_peepholer.py47
-rw-r--r--Lib/test/test_pep277.py66
-rw-r--r--Lib/test/test_pep292.py33
-rw-r--r--Lib/test/test_pep3131.py7
-rw-r--r--Lib/test/test_pep3151.py211
-rw-r--r--Lib/test/test_pep380.py965
-rw-r--r--Lib/test/test_pickle.py28
-rw-r--r--Lib/test/test_pipes.py15
-rw-r--r--Lib/test/test_pkg.py33
-rw-r--r--Lib/test/test_pkgimport.py4
-rw-r--r--Lib/test/test_pkgutil.py102
-rw-r--r--Lib/test/test_platform.py51
-rw-r--r--Lib/test/test_plistlib.py6
-rw-r--r--Lib/test/test_poplib.py15
-rw-r--r--Lib/test/test_posix.py615
-rw-r--r--Lib/test/test_posixpath.py12
-rw-r--r--Lib/test/test_pprint.py4
-rw-r--r--Lib/test/test_print.py26
-rw-r--r--Lib/test/test_property.py23
-rw-r--r--Lib/test/test_pty.py19
-rw-r--r--Lib/test/test_pulldom.py347
-rw-r--r--Lib/test/test_pydoc.py21
-rw-r--r--Lib/test/test_raise.py39
-rw-r--r--Lib/test/test_random.py6
-rw-r--r--Lib/test/test_range.py105
-rw-r--r--Lib/test/test_re.py129
-rw-r--r--Lib/test/test_reprlib.py82
-rw-r--r--Lib/test/test_richcmp.py1
-rw-r--r--Lib/test/test_runpy.py58
-rw-r--r--Lib/test/test_sax.py4
-rw-r--r--Lib/test/test_sched.py36
-rw-r--r--Lib/test/test_scope.py19
-rw-r--r--Lib/test/test_select.py22
-rw-r--r--Lib/test/test_set.py21
-rw-r--r--Lib/test/test_shelve.py6
-rw-r--r--Lib/test/test_shlex.py21
-rw-r--r--Lib/test/test_shutil.py942
-rw-r--r--Lib/test/test_signal.py469
-rw-r--r--Lib/test/test_site.py5
-rw-r--r--Lib/test/test_smtpd.py368
-rw-r--r--Lib/test/test_smtplib.py80
-rw-r--r--Lib/test/test_smtpnet.py42
-rw-r--r--Lib/test/test_socket.py2792
-rw-r--r--Lib/test/test_socketserver.py2
-rw-r--r--Lib/test/test_ssl.py393
-rw-r--r--Lib/test/test_stat.py66
-rw-r--r--Lib/test/test_string.py70
-rw-r--r--Lib/test/test_strlit.py32
-rw-r--r--Lib/test/test_struct.py71
-rw-r--r--Lib/test/test_structseq.py5
-rw-r--r--Lib/test/test_subprocess.py274
-rw-r--r--Lib/test/test_sundry.py1
-rw-r--r--Lib/test/test_super.py49
-rw-r--r--Lib/test/test_support.py199
-rw-r--r--Lib/test/test_sys.py179
-rw-r--r--Lib/test/test_sys_settrace.py10
-rw-r--r--Lib/test/test_sysconfig.py136
-rw-r--r--Lib/test/test_tarfile.py160
-rw-r--r--Lib/test/test_telnetlib.py1
-rw-r--r--Lib/test/test_tempfile.py263
-rw-r--r--Lib/test/test_textwrap.py148
-rw-r--r--Lib/test/test_threaded_import.py50
-rw-r--r--Lib/test/test_threading.py19
-rw-r--r--Lib/test/test_threadsignals.py6
-rw-r--r--Lib/test/test_time.py486
-rw-r--r--Lib/test/test_tokenize.py176
-rw-r--r--Lib/test/test_tools.py37
-rw-r--r--Lib/test/test_trace.py17
-rw-r--r--Lib/test/test_traceback.py15
-rw-r--r--Lib/test/test_tuple.py29
-rw-r--r--Lib/test/test_types.py583
-rw-r--r--Lib/test/test_ucn.py88
-rw-r--r--Lib/test/test_unicode.py334
-rw-r--r--Lib/test/test_unicode_file.py17
-rw-r--r--Lib/test/test_unicodedata.py16
-rw-r--r--Lib/test/test_urllib.py63
-rw-r--r--Lib/test/test_urllib2.py129
-rw-r--r--Lib/test/test_urllib2_localnet.py7
-rw-r--r--Lib/test/test_urllib2net.py13
-rw-r--r--Lib/test/test_userlist.py6
-rwxr-xr-xLib/test/test_userstring.py4
-rw-r--r--Lib/test/test_uuid.py4
-rw-r--r--Lib/test/test_venv.py203
-rw-r--r--Lib/test/test_wait3.py7
-rw-r--r--Lib/test/test_warnings.py14
-rw-r--r--Lib/test/test_webbrowser.py192
-rw-r--r--Lib/test/test_winsound.py8
-rw-r--r--Lib/test/test_wsgiref.py10
-rw-r--r--Lib/test/test_xml_etree.py1014
-rw-r--r--Lib/test/test_xml_etree_c.py104
-rw-r--r--Lib/test/test_xmlrpc.py119
-rw-r--r--Lib/test/test_xmlrpc_net.py4
-rw-r--r--Lib/test/test_zipfile.py342
-rw-r--r--Lib/test/test_zipfile64.py20
-rw-r--r--Lib/test/test_zipimport.py24
-rw-r--r--Lib/test/test_zipimport_support.py5
-rw-r--r--Lib/test/test_zlib.py72
-rw-r--r--Lib/test/threaded_import_hangers.py13
-rw-r--r--Lib/test/tokenize_tests.txt8
-rw-r--r--Lib/textwrap.py36
-rw-r--r--Lib/threading.py221
-rw-r--r--Lib/timeit.py30
-rw-r--r--Lib/tkinter/__init__.py155
-rw-r--r--Lib/tkinter/_fix.py4
-rw-r--r--Lib/tkinter/filedialog.py2
-rw-r--r--Lib/tkinter/font.py63
-rw-r--r--Lib/tkinter/test/test_tkinter/test_variables.py165
-rwxr-xr-xLib/token.py2
-rw-r--r--Lib/tokenize.py225
-rw-r--r--Lib/trace.py24
-rw-r--r--Lib/traceback.py13
-rw-r--r--Lib/turtle.py1
-rw-r--r--Lib/types.py60
-rw-r--r--Lib/unittest/__main__.py9
-rw-r--r--Lib/unittest/case.py133
-rw-r--r--Lib/unittest/main.py77
-rw-r--r--Lib/unittest/mock.py2211
-rw-r--r--Lib/unittest/result.py1
-rw-r--r--Lib/unittest/test/__init__.py1
-rw-r--r--Lib/unittest/test/_test_warnings.py1
-rw-r--r--Lib/unittest/test/test_assertions.py73
-rw-r--r--Lib/unittest/test/test_case.py63
-rw-r--r--Lib/unittest/test/test_loader.py4
-rw-r--r--Lib/unittest/test/test_program.py17
-rw-r--r--Lib/unittest/test/testmock/__init__.py17
-rw-r--r--Lib/unittest/test/testmock/support.py23
-rw-r--r--Lib/unittest/test/testmock/testcallable.py147
-rw-r--r--Lib/unittest/test/testmock/testhelpers.py889
-rw-r--r--Lib/unittest/test/testmock/testmagicmethods.py403
-rw-r--r--Lib/unittest/test/testmock/testmock.py1275
-rw-r--r--Lib/unittest/test/testmock/testpatch.py1785
-rw-r--r--Lib/unittest/test/testmock/testsentinel.py28
-rw-r--r--Lib/unittest/test/testmock/testwith.py176
-rw-r--r--Lib/urllib/error.py3
-rw-r--r--Lib/urllib/request.py213
-rw-r--r--Lib/urllib/response.py7
-rw-r--r--Lib/uuid.py2
-rw-r--r--Lib/venv/__init__.py398
-rw-r--r--Lib/venv/__main__.py10
-rw-r--r--Lib/venv/scripts/nt/Activate.ps134
-rw-r--r--Lib/venv/scripts/nt/Deactivate.ps119
-rw-r--r--Lib/venv/scripts/nt/activate.bat31
-rw-r--r--Lib/venv/scripts/nt/deactivate.bat17
-rw-r--r--Lib/venv/scripts/nt/pydoc.py4
-rw-r--r--Lib/venv/scripts/posix/activate76
-rwxr-xr-xLib/venv/scripts/posix/pydoc5
-rw-r--r--Lib/webbrowser.py46
-rw-r--r--Lib/wsgiref.egg-info8
-rw-r--r--Lib/wsgiref/simple_server.py3
-rw-r--r--Lib/xdrlib.py6
-rw-r--r--Lib/xml/dom/__init__.py1
-rw-r--r--Lib/xml/dom/domreg.py2
-rw-r--r--Lib/xml/dom/expatbuilder.py42
-rw-r--r--Lib/xml/dom/minidom.py293
-rw-r--r--Lib/xml/dom/pulldom.py6
-rw-r--r--Lib/xml/etree/ElementTree.py380
-rw-r--r--Lib/xml/etree/cElementTree.py4
-rw-r--r--Lib/xml/parsers/expat.py2
-rw-r--r--Lib/xmlrpc/client.py132
-rw-r--r--Lib/xmlrpc/server.py34
-rw-r--r--Lib/zipfile.py386
-rw-r--r--Mac/BuildScript/README.txt151
-rwxr-xr-xMac/BuildScript/build-installer.py500
-rw-r--r--Mac/BuildScript/ncurses-5.5.patch36
-rw-r--r--Mac/BuildScript/resources/ReadMe.txt28
-rw-r--r--Mac/BuildScript/resources/Welcome.rtf8
-rwxr-xr-xMac/BuildScript/scripts/postflight.documentation9
-rwxr-xr-xMac/BuildScript/scripts/postflight.framework14
-rw-r--r--Mac/Makefile.in19
-rw-r--r--Mac/README206
-rw-r--r--Mac/Tools/fixapplepython23.py131
-rw-r--r--Mac/Tools/pythonw.c48
-rw-r--r--Makefile.pre.in441
-rw-r--r--Misc/ACKS179
-rw-r--r--Misc/HISTORY5397
-rw-r--r--Misc/NEWS8810
-rw-r--r--Misc/README4
-rw-r--r--Misc/RPM/python-3.3.spec (renamed from Misc/RPM/python-3.2.spec)4
-rw-r--r--Misc/python-config.in5
-rw-r--r--Misc/python.man52
-rw-r--r--Misc/svnmap.txt72546
-rw-r--r--Misc/valgrind-python.supp80
-rw-r--r--Modules/Setup.dist12
-rw-r--r--Modules/_bisectmodule.c7
-rw-r--r--Modules/_bz2module.c590
-rw-r--r--Modules/_codecsmodule.c259
-rw-r--r--Modules/_collectionsmodule.c119
-rw-r--r--Modules/_cryptmodule.c (renamed from Modules/cryptmodule.c)4
-rw-r--r--Modules/_csv.c255
-rw-r--r--Modules/_ctypes/_ctypes.c46
-rw-r--r--Modules/_ctypes/callproc.c18
-rw-r--r--Modules/_ctypes/cfield.c84
-rw-r--r--Modules/_ctypes/libffi.diff149
-rw-r--r--Modules/_ctypes/libffi/ChangeLog915
-rw-r--r--Modules/_ctypes/libffi/ChangeLog.libffi33
-rw-r--r--Modules/_ctypes/libffi/LICENSE6
-rw-r--r--Modules/_ctypes/libffi/Makefile.am71
-rw-r--r--Modules/_ctypes/libffi/Makefile.in380
-rw-r--r--Modules/_ctypes/libffi/README59
-rw-r--r--Modules/_ctypes/libffi/aclocal.m4906
-rwxr-xr-xModules/_ctypes/libffi/compile21
-rwxr-xr-xModules/_ctypes/libffi/config.guess225
-rwxr-xr-xModules/_ctypes/libffi/config.sub181
-rwxr-xr-xModules/_ctypes/libffi/configure4955
-rw-r--r--Modules/_ctypes/libffi/configure.ac244
-rwxr-xr-xModules/_ctypes/libffi/depcomp116
-rw-r--r--Modules/_ctypes/libffi/doc/libffi.info76
-rw-r--r--Modules/_ctypes/libffi/doc/libffi.texi40
-rw-r--r--Modules/_ctypes/libffi/doc/stamp-vti8
-rw-r--r--Modules/_ctypes/libffi/doc/version.texi8
-rw-r--r--Modules/_ctypes/libffi/fficonfig.h.in18
-rw-r--r--Modules/_ctypes/libffi/generate-ios-source-and-headers.py160
-rw-r--r--Modules/_ctypes/libffi/generate-osx-source-and-headers.py153
-rw-r--r--Modules/_ctypes/libffi/include/Makefile.in40
-rw-r--r--Modules/_ctypes/libffi/include/ffi.h.in125
-rw-r--r--Modules/_ctypes/libffi/include/ffi_common.h16
-rwxr-xr-xModules/_ctypes/libffi/install-sh531
-rw-r--r--Modules/_ctypes/libffi/libffi.xcodeproj/project.pbxproj595
-rw-r--r--Modules/_ctypes/libffi/libtool-version2
-rw-r--r--[-rwxr-xr-x]Modules/_ctypes/libffi/ltmain.sh4039
-rw-r--r--Modules/_ctypes/libffi/m4/asmcfi.m413
-rw-r--r--Modules/_ctypes/libffi/m4/ax_append_flag.m469
-rw-r--r--Modules/_ctypes/libffi/m4/ax_cc_maxopt.m4180
-rw-r--r--Modules/_ctypes/libffi/m4/ax_cflags_warn_all.m4121
-rw-r--r--Modules/_ctypes/libffi/m4/ax_check_compile_flag.m472
-rw-r--r--Modules/_ctypes/libffi/m4/ax_compiler_vendor.m484
-rw-r--r--Modules/_ctypes/libffi/m4/ax_configure_args.m470
-rw-r--r--Modules/_ctypes/libffi/m4/ax_enable_builddir.m4300
-rw-r--r--Modules/_ctypes/libffi/m4/ax_gcc_archflag.m4215
-rw-r--r--Modules/_ctypes/libffi/m4/ax_gcc_x86_cpuid.m479
-rw-r--r--Modules/_ctypes/libffi/m4/libtool.m42273
-rw-r--r--Modules/_ctypes/libffi/m4/ltoptions.m432
-rw-r--r--Modules/_ctypes/libffi/m4/ltversion.m412
-rw-r--r--Modules/_ctypes/libffi/m4/lt~obsolete.m412
-rw-r--r--Modules/_ctypes/libffi/man/Makefile.am4
-rw-r--r--Modules/_ctypes/libffi/man/Makefile.in40
-rw-r--r--Modules/_ctypes/libffi/man/ffi.310
-rw-r--r--Modules/_ctypes/libffi/man/ffi_prep_cif.38
-rw-r--r--Modules/_ctypes/libffi/man/ffi_prep_cif_var.373
-rwxr-xr-xModules/_ctypes/libffi/missing104
-rwxr-xr-x[-rw-r--r--]Modules/_ctypes/libffi/msvcc.sh30
-rw-r--r--Modules/_ctypes/libffi/src/alpha/ffi.c6
-rw-r--r--Modules/_ctypes/libffi/src/alpha/ffitarget.h7
-rw-r--r--Modules/_ctypes/libffi/src/alpha/osf.S57
-rw-r--r--Modules/_ctypes/libffi/src/arm/ffi.c507
-rw-r--r--Modules/_ctypes/libffi/src/arm/ffitarget.h28
-rw-r--r--Modules/_ctypes/libffi/src/arm/gentramp.sh118
-rw-r--r--Modules/_ctypes/libffi/src/arm/sysv.S226
-rw-r--r--Modules/_ctypes/libffi/src/arm/trampoline.S4450
-rw-r--r--Modules/_ctypes/libffi/src/avr32/ffi.c6
-rw-r--r--Modules/_ctypes/libffi/src/avr32/ffitarget.h11
-rw-r--r--Modules/_ctypes/libffi/src/closures.c29
-rw-r--r--Modules/_ctypes/libffi/src/cris/ffi.c15
-rw-r--r--Modules/_ctypes/libffi/src/cris/ffitarget.h11
-rw-r--r--Modules/_ctypes/libffi/src/dlmalloc.c70
-rw-r--r--Modules/_ctypes/libffi/src/frv/ffitarget.h15
-rw-r--r--Modules/_ctypes/libffi/src/ia64/ffi.c16
-rw-r--r--Modules/_ctypes/libffi/src/ia64/ffitarget.h11
-rw-r--r--Modules/_ctypes/libffi/src/java_raw_api.c2
-rw-r--r--Modules/_ctypes/libffi/src/m32r/ffitarget.h11
-rw-r--r--Modules/_ctypes/libffi/src/m68k/ffi.c96
-rw-r--r--Modules/_ctypes/libffi/src/m68k/ffitarget.h11
-rw-r--r--Modules/_ctypes/libffi/src/m68k/sysv.S89
-rw-r--r--Modules/_ctypes/libffi/src/mips/ffi.c17
-rw-r--r--Modules/_ctypes/libffi/src/mips/ffitarget.h36
-rw-r--r--Modules/_ctypes/libffi/src/mips/n32.S1
-rw-r--r--Modules/_ctypes/libffi/src/moxie/ffitarget.h56
-rw-r--r--Modules/_ctypes/libffi/src/pa/ffi.c11
-rw-r--r--Modules/_ctypes/libffi/src/pa/ffitarget.h18
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/aix.S6
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/aix_closure.S6
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/asm.h2
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/darwin.S292
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/darwin_closure.S459
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/ffi.c639
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/ffi_darwin.c779
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/ffitarget.h43
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/ppc_closure.S19
-rw-r--r--Modules/_ctypes/libffi/src/powerpc/sysv.S27
-rw-r--r--Modules/_ctypes/libffi/src/prep_cif.c69
-rw-r--r--Modules/_ctypes/libffi/src/s390/ffitarget.h13
-rw-r--r--Modules/_ctypes/libffi/src/sh/ffi.c5
-rw-r--r--Modules/_ctypes/libffi/src/sh/ffitarget.h11
-rw-r--r--Modules/_ctypes/libffi/src/sh64/ffi.c5
-rw-r--r--Modules/_ctypes/libffi/src/sh64/ffitarget.h11
-rw-r--r--Modules/_ctypes/libffi/src/sparc/ffi.c66
-rw-r--r--Modules/_ctypes/libffi/src/sparc/ffitarget.h15
-rw-r--r--Modules/_ctypes/libffi/src/sparc/v9.S2
-rw-r--r--Modules/_ctypes/libffi/src/x86/ffi.c258
-rw-r--r--Modules/_ctypes/libffi/src/x86/ffi64.c36
-rw-r--r--Modules/_ctypes/libffi/src/x86/ffitarget.h49
-rw-r--r--Modules/_ctypes/libffi/src/x86/sysv.S66
-rw-r--r--Modules/_ctypes/libffi/src/x86/unix64.S4
-rw-r--r--Modules/_ctypes/libffi/src/x86/win32.S268
-rw-r--r--Modules/_ctypes/libffi/src/x86/win64.S22
-rw-r--r--Modules/_ctypes/libffi/testsuite/Makefile.am129
-rw-r--r--Modules/_ctypes/libffi/testsuite/Makefile.in174
-rw-r--r--Modules/_ctypes/libffi/testsuite/lib/libffi.exp (renamed from Modules/_ctypes/libffi/testsuite/lib/libffi-dg.exp)58
-rw-r--r--Modules/_ctypes/libffi/testsuite/lib/target-libpath.exp22
-rwxr-xr-xModules/_ctypes/libffi/testsuite/libffi.call/a.outbin0 -> 33895 bytes-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/call.exp6
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/closure_stdcall.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/closure_thiscall.c72
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_12byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_16byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_18byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_19byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_1_1byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_20byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_20byte1.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_24byte.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_2byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_3_1byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_3byte1.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_3byte2.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_4_1byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_4byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_5_1_byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_5byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_64byte.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_6_1_byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_6byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_7_1_byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_7byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_8byte.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_9byte1.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_9byte2.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_double.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_float.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_longdouble.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_longdouble_split.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_longdouble_split2.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_pointer.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_sint16.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_sint32.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_sint64.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_uint16.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_uint32.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_align_uint64.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_dbls_struct.c4
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_double_va.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_longdouble.c2
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_longdouble_va.c9
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_pointer.c2
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/cls_pointer_stack.c8
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/err_bad_abi.c5
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/err_bad_typedef.c7
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/fastthis1_win32.c50
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/fastthis2_win32.c50
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/fastthis3_win32.c56
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/ffitest.h41
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/float_va.c107
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/huge_struct.c60
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/many2.c54
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/many2_win32.c63
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct.c12
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct1.c14
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct10.c12
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct2.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct3.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct4.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct5.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct6.c12
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct7.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct8.c12
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/nested_struct9.c12
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/return_sc.c2
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/stret_large.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/stret_large2.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/stret_medium.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/stret_medium2.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/strlen2_win32.c44
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct1.c12
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct1_win32.c67
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct2.c10
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct2_win32.c67
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct3.c9
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct4.c13
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct5.c13
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct6.c14
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct7.c14
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct8.c13
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/struct9.c13
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.call/testclosure.c4
-rw-r--r--Modules/_ctypes/libffi/testsuite/libffi.special/special.exp4
-rw-r--r--Modules/_ctypes/stgdict.c12
-rw-r--r--Modules/_curses_panel.c122
-rw-r--r--Modules/_cursesmodule.c683
-rw-r--r--Modules/_datetimemodule.c503
-rw-r--r--Modules/_dbmmodule.c12
-rw-r--r--Modules/_decimal/README.txt46
-rw-r--r--Modules/_decimal/_decimal.c5663
-rw-r--r--Modules/_decimal/docstrings.h753
-rw-r--r--Modules/_decimal/libmpdec/README.txt90
-rw-r--r--Modules/_decimal/libmpdec/basearith.c658
-rw-r--r--Modules/_decimal/libmpdec/basearith.h215
-rw-r--r--Modules/_decimal/libmpdec/bits.h192
-rw-r--r--Modules/_decimal/libmpdec/constants.c132
-rw-r--r--Modules/_decimal/libmpdec/constants.h83
-rw-r--r--Modules/_decimal/libmpdec/context.c286
-rw-r--r--Modules/_decimal/libmpdec/convolute.c174
-rw-r--r--Modules/_decimal/libmpdec/convolute.h43
-rw-r--r--Modules/_decimal/libmpdec/crt.c179
-rw-r--r--Modules/_decimal/libmpdec/crt.h40
-rw-r--r--Modules/_decimal/libmpdec/difradix2.c173
-rw-r--r--Modules/_decimal/libmpdec/difradix2.h41
-rw-r--r--Modules/_decimal/libmpdec/fnt.c81
-rw-r--r--Modules/_decimal/libmpdec/fnt.h42
-rw-r--r--Modules/_decimal/libmpdec/fourstep.c257
-rw-r--r--Modules/_decimal/libmpdec/fourstep.h41
-rw-r--r--Modules/_decimal/libmpdec/io.c1575
-rw-r--r--Modules/_decimal/libmpdec/io.h59
-rw-r--r--Modules/_decimal/libmpdec/literature/REFERENCES.txt51
-rw-r--r--Modules/_decimal/libmpdec/literature/bignum.txt83
-rw-r--r--Modules/_decimal/libmpdec/literature/fnt.py208
-rw-r--r--Modules/_decimal/libmpdec/literature/matrix-transform.txt256
-rw-r--r--Modules/_decimal/libmpdec/literature/mulmod-64.txt127
-rw-r--r--Modules/_decimal/libmpdec/literature/mulmod-ppro.txt269
-rw-r--r--Modules/_decimal/libmpdec/literature/six-step.txt63
-rw-r--r--Modules/_decimal/libmpdec/literature/umodarith.lisp692
-rw-r--r--Modules/_decimal/libmpdec/memory.c292
-rw-r--r--Modules/_decimal/libmpdec/memory.h44
-rw-r--r--Modules/_decimal/libmpdec/mpdecimal.c8149
-rw-r--r--Modules/_decimal/libmpdec/mpdecimal.h800
-rw-r--r--Modules/_decimal/libmpdec/numbertheory.c132
-rw-r--r--Modules/_decimal/libmpdec/numbertheory.h71
-rw-r--r--Modules/_decimal/libmpdec/sixstep.c214
-rw-r--r--Modules/_decimal/libmpdec/sixstep.h41
-rw-r--r--Modules/_decimal/libmpdec/transpose.c276
-rw-r--r--Modules/_decimal/libmpdec/transpose.h55
-rw-r--r--Modules/_decimal/libmpdec/typearith.h669
-rw-r--r--Modules/_decimal/libmpdec/umodarith.h650
-rw-r--r--Modules/_decimal/libmpdec/vccompat.h62
-rw-r--r--Modules/_decimal/libmpdec/vcdiv64.asm48
-rw-r--r--Modules/_decimal/libmpdec/vcstdint.h232
-rw-r--r--Modules/_decimal/tests/README.txt15
-rw-r--r--Modules/_decimal/tests/bench.py135
-rw-r--r--Modules/_decimal/tests/bignum.py42
-rw-r--r--Modules/_decimal/tests/deccheck.py1075
-rw-r--r--Modules/_decimal/tests/formathelper.py344
-rw-r--r--Modules/_decimal/tests/randdec.py559
-rw-r--r--Modules/_decimal/tests/randfloat.py250
-rwxr-xr-xModules/_decimal/tests/runall-memorydebugger.sh175
-rwxr-xr-xModules/_decimal/tests/runall.bat111
-rw-r--r--Modules/_elementtree.c2101
-rw-r--r--Modules/_freeze_importlib.c142
-rw-r--r--Modules/_functoolsmodule.c210
-rw-r--r--Modules/_gestalt.c2
-rw-r--r--Modules/_hashopenssl.c8
-rw-r--r--Modules/_io/_iomodule.c166
-rw-r--r--Modules/_io/_iomodule.h17
-rw-r--r--Modules/_io/bufferedio.c318
-rw-r--r--Modules/_io/bytesio.c2
-rw-r--r--Modules/_io/fileio.c153
-rw-r--r--Modules/_io/iobase.c49
-rw-r--r--Modules/_io/stringio.c185
-rw-r--r--Modules/_io/textio.c683
-rw-r--r--Modules/_json.c439
-rw-r--r--Modules/_localemodule.c103
-rw-r--r--Modules/_lsprof.c45
-rw-r--r--Modules/_lzmamodule.c1286
-rw-r--r--Modules/_multiprocessing/connection.h527
-rw-r--r--Modules/_multiprocessing/multiprocessing.c241
-rw-r--r--Modules/_multiprocessing/multiprocessing.h80
-rw-r--r--Modules/_multiprocessing/pipe_connection.c149
-rw-r--r--Modules/_multiprocessing/semaphore.c59
-rw-r--r--Modules/_multiprocessing/socket_connection.c205
-rw-r--r--Modules/_multiprocessing/win32_functions.c267
-rw-r--r--Modules/_pickle.c306
-rw-r--r--Modules/_posixsubprocess.c22
-rw-r--r--Modules/_randommodule.c19
-rw-r--r--Modules/_sqlite/cache.c21
-rw-r--r--Modules/_sqlite/connection.c99
-rw-r--r--Modules/_sqlite/connection.h3
-rw-r--r--Modules/_sqlite/cursor.c31
-rw-r--r--Modules/_sqlite/microprotocols.c8
-rw-r--r--Modules/_sqlite/module.c19
-rw-r--r--Modules/_sqlite/module.h2
-rw-r--r--Modules/_sqlite/row.c10
-rw-r--r--Modules/_sqlite/statement.c12
-rw-r--r--Modules/_sqlite/statement.h4
-rw-r--r--Modules/_sre.c621
-rw-r--r--Modules/_ssl.c829
-rw-r--r--Modules/_ssl_data.h1653
-rw-r--r--Modules/_struct.c90
-rw-r--r--Modules/_testbuffer.c2875
-rw-r--r--Modules/_testcapimodule.c408
-rw-r--r--Modules/_threadmodule.c60
-rw-r--r--Modules/_time.c28
-rw-r--r--Modules/_time.h3
-rw-r--r--Modules/_tkinter.c102
-rw-r--r--Modules/_winapi.c1328
-rw-r--r--Modules/arraymodule.c200
-rw-r--r--Modules/atexitmodule.c10
-rw-r--r--Modules/audioop.c8
-rw-r--r--Modules/binascii.c58
-rw-r--r--Modules/bz2module.c2169
-rw-r--r--Modules/cjkcodecs/_codecs_cn.c14
-rw-r--r--Modules/cjkcodecs/_codecs_hk.c2
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c2
-rw-r--r--Modules/cjkcodecs/_codecs_jp.c34
-rw-r--r--Modules/cjkcodecs/_codecs_kr.c18
-rw-r--r--Modules/cjkcodecs/_codecs_tw.c4
-rw-r--r--Modules/cjkcodecs/multibytecodec.c48
-rw-r--r--Modules/config.c.in3
-rw-r--r--Modules/errnomodule.c82
-rw-r--r--Modules/expat/COPYING1
-rw-r--r--Modules/expat/amigaconfig.h64
-rw-r--r--Modules/expat/ascii.h7
-rw-r--r--Modules/expat/expat.h29
-rw-r--r--Modules/expat/expat_external.h10
-rw-r--r--Modules/expat/internal.h2
-rw-r--r--Modules/expat/watcomconfig.h47
-rw-r--r--Modules/expat/xmlparse.c423
-rw-r--r--Modules/expat/xmlrole.c12
-rw-r--r--Modules/expat/xmltok.c22
-rw-r--r--Modules/expat/xmltok_impl.c6
-rw-r--r--Modules/expat/xmltok_ns.c9
-rw-r--r--Modules/faulthandler.c1152
-rw-r--r--Modules/fcntlmodule.c5
-rw-r--r--Modules/fpectlmodule.c11
-rw-r--r--Modules/gc_weakref.txt6
-rw-r--r--Modules/gcmodule.c98
-rw-r--r--Modules/getbuildinfo.c18
-rw-r--r--Modules/getpath.c118
-rw-r--r--Modules/itertoolsmodule.c919
-rw-r--r--Modules/main.c69
-rw-r--r--Modules/mathmodule.c87
-rw-r--r--Modules/md5module.c31
-rw-r--r--Modules/mmapmodule.c90
-rw-r--r--Modules/nismodule.c2
-rw-r--r--Modules/operator.c166
-rw-r--r--Modules/ossaudiodev.c101
-rw-r--r--Modules/parsermodule.c151
-rw-r--r--Modules/posixmodule.c6136
-rw-r--r--Modules/pyexpat.c52
-rw-r--r--Modules/python.c1
-rw-r--r--Modules/readline.c13
-rw-r--r--Modules/resource.c16
-rw-r--r--Modules/selectmodule.c493
-rw-r--r--Modules/sha1module.c30
-rw-r--r--Modules/sha256module.c19
-rw-r--r--Modules/sha512module.c19
-rw-r--r--Modules/signalmodule.c439
-rw-r--r--Modules/socketmodule.c1547
-rw-r--r--Modules/socketmodule.h27
-rw-r--r--Modules/sre.h10
-rw-r--r--Modules/syslogmodule.c22
-rw-r--r--Modules/termios.c5
-rw-r--r--Modules/testcapi_long.h29
-rw-r--r--Modules/timemodule.c950
-rw-r--r--Modules/tkappinit.c6
-rw-r--r--Modules/unicodedata.c306
-rw-r--r--Modules/unicodedata_db.h7148
-rw-r--r--Modules/unicodename_db.h38401
-rw-r--r--Modules/xxlimited.c3
-rw-r--r--Modules/zipimport.c667
-rw-r--r--Modules/zlibmodule.c158
-rw-r--r--Objects/abstract.c335
-rw-r--r--Objects/bytearrayobject.c375
-rw-r--r--Objects/bytes_methods.c12
-rw-r--r--Objects/bytesobject.c447
-rw-r--r--Objects/classobject.c23
-rw-r--r--Objects/codeobject.c134
-rw-r--r--Objects/complexobject.c53
-rw-r--r--Objects/descrobject.c272
-rw-r--r--Objects/dictnotes.txt237
-rw-r--r--Objects/dictobject.c1955
-rw-r--r--Objects/enumobject.c54
-rw-r--r--Objects/exceptions.c946
-rw-r--r--Objects/fileobject.c34
-rw-r--r--Objects/floatobject.c221
-rw-r--r--Objects/frameobject.c32
-rw-r--r--Objects/funcobject.c150
-rw-r--r--Objects/genobject.c204
-rw-r--r--Objects/iterobject.c47
-rw-r--r--Objects/listobject.c161
-rw-r--r--Objects/longobject.c441
-rw-r--r--Objects/memoryobject.c3217
-rw-r--r--Objects/methodobject.c56
-rw-r--r--Objects/moduleobject.c135
-rw-r--r--Objects/namespaceobject.c225
-rw-r--r--Objects/object.c550
-rw-r--r--Objects/obmalloc.c151
-rw-r--r--Objects/rangeobject.c237
-rw-r--r--Objects/setobject.c191
-rw-r--r--Objects/sliceobject.c71
-rw-r--r--Objects/stringlib/asciilib.h30
-rw-r--r--Objects/stringlib/codecs.h629
-rw-r--r--Objects/stringlib/count.h9
-rw-r--r--Objects/stringlib/eq.h23
-rw-r--r--Objects/stringlib/fastsearch.h75
-rw-r--r--Objects/stringlib/find.h89
-rw-r--r--Objects/stringlib/find_max_char.h133
-rw-r--r--Objects/stringlib/formatter.h1518
-rw-r--r--Objects/stringlib/localeutil.h100
-rw-r--r--Objects/stringlib/partition.h12
-rw-r--r--Objects/stringlib/split.h26
-rw-r--r--Objects/stringlib/stringdefs.h8
-rw-r--r--Objects/stringlib/ucs1lib.h31
-rw-r--r--Objects/stringlib/ucs2lib.h30
-rw-r--r--Objects/stringlib/ucs4lib.h30
-rw-r--r--Objects/stringlib/undef.h12
-rw-r--r--Objects/stringlib/unicode_format.h (renamed from Objects/stringlib/string_format.h)444
-rw-r--r--Objects/stringlib/unicodedefs.h8
-rw-r--r--Objects/tupleobject.c74
-rw-r--r--Objects/typeobject.c1129
-rw-r--r--Objects/typeslots.inc2
-rw-r--r--Objects/typeslots.py2
-rw-r--r--Objects/unicodectype.c125
-rw-r--r--Objects/unicodeobject.c12120
-rw-r--r--Objects/unicodetype_db.h4718
-rw-r--r--Objects/weakrefobject.c61
-rw-r--r--PC/VC6/_multiprocessing.dsp8
-rw-r--r--PC/VC6/pythoncore.dsp9
-rw-r--r--PC/VC6/readme.txt8
-rw-r--r--PC/VS7.1/pythoncore.vcproj51
-rw-r--r--PC/VS7.1/readme.txt4
-rw-r--r--PC/VS8.0/_multiprocessing.vcproj12
-rw-r--r--PC/VS8.0/build_ssl.bat4
-rw-r--r--PC/VS8.0/kill_python.c2
-rw-r--r--PC/VS8.0/pyproject.vsprops4
-rw-r--r--PC/VS8.0/pythoncore.vcproj4
-rw-r--r--PC/VS9.0/_bz2.vcproj (renamed from PCbuild/bz2.vcproj)6
-rw-r--r--PC/VS9.0/_ctypes.vcproj (renamed from PCbuild/_ctypes.vcproj)48
-rw-r--r--PC/VS9.0/_ctypes_test.vcproj (renamed from PCbuild/_ctypes_test.vcproj)6
-rw-r--r--PC/VS9.0/_decimal.vcproj743
-rw-r--r--PC/VS9.0/_elementtree.vcproj (renamed from PCbuild/_elementtree.vcproj)56
-rw-r--r--PC/VS9.0/_hashlib.vcproj (renamed from PCbuild/_hashlib.vcproj)4
-rw-r--r--PC/VS9.0/_lzma.vcproj537
-rw-r--r--PC/VS9.0/_msi.vcproj (renamed from PCbuild/_msi.vcproj)4
-rw-r--r--PC/VS9.0/_multiprocessing.vcproj (renamed from PCbuild/_multiprocessing.vcproj)24
-rw-r--r--PC/VS9.0/_socket.vcproj (renamed from PCbuild/_socket.vcproj)6
-rw-r--r--PC/VS9.0/_sqlite3.vcproj (renamed from PCbuild/_sqlite3.vcproj)38
-rw-r--r--PC/VS9.0/_ssl.vcproj (renamed from PCbuild/_ssl.vcproj)4
-rw-r--r--PC/VS9.0/_testbuffer.vcproj521
-rw-r--r--PC/VS9.0/_testcapi.vcproj (renamed from PCbuild/_testcapi.vcproj)4
-rw-r--r--PC/VS9.0/_tkinter.vcproj (renamed from PCbuild/_tkinter.vcproj)6
-rw-r--r--PC/VS9.0/bdist_wininst.vcproj (renamed from PCbuild/bdist_wininst.vcproj)44
-rw-r--r--PC/VS9.0/debug.vsprops (renamed from PCbuild/debug.vsprops)0
-rw-r--r--PC/VS9.0/kill_python.c178
-rw-r--r--PC/VS9.0/kill_python.vcproj (renamed from PCbuild/kill_python.vcproj)0
-rw-r--r--PC/VS9.0/make_buildinfo.c195
-rw-r--r--PC/VS9.0/make_buildinfo.vcproj (renamed from PCbuild/make_buildinfo.vcproj)2
-rw-r--r--PC/VS9.0/make_versioninfo.vcproj (renamed from PCbuild/make_versioninfo.vcproj)28
-rw-r--r--PC/VS9.0/pcbuild.sln658
-rw-r--r--PC/VS9.0/pginstrument.vsprops (renamed from PCbuild/pginstrument.vsprops)0
-rw-r--r--PC/VS9.0/pgupdate.vsprops (renamed from PCbuild/pgupdate.vsprops)0
-rw-r--r--PC/VS9.0/pyd.vsprops (renamed from PCbuild/pyd.vsprops)0
-rw-r--r--PC/VS9.0/pyd_d.vsprops (renamed from PCbuild/pyd_d.vsprops)0
-rw-r--r--PC/VS9.0/pyexpat.vcproj (renamed from PCbuild/pyexpat.vcproj)30
-rw-r--r--PC/VS9.0/pyproject.vsprops (renamed from PCbuild/pyproject.vsprops)16
-rw-r--r--PC/VS9.0/python.vcproj (renamed from PCbuild/python.vcproj)12
-rw-r--r--PC/VS9.0/python3dll.vcproj (renamed from PCbuild/python3dll.vcproj)44
-rw-r--r--PC/VS9.0/pythoncore.vcproj (renamed from PCbuild/pythoncore.vcproj)640
-rw-r--r--PC/VS9.0/pythonw.vcproj (renamed from PCbuild/pythonw.vcproj)6
-rw-r--r--PC/VS9.0/release.vsprops (renamed from PCbuild/release.vsprops)0
-rw-r--r--PC/VS9.0/select.vcproj (renamed from PCbuild/select.vcproj)4
-rw-r--r--PC/VS9.0/sqlite3.vcproj (renamed from PCbuild/sqlite3.vcproj)2
-rw-r--r--PC/VS9.0/sqlite3.vsprops (renamed from PCbuild/sqlite3.vsprops)0
-rw-r--r--PC/VS9.0/ssl.vcproj (renamed from PCbuild/ssl.vcproj)2
-rw-r--r--PC/VS9.0/unicodedata.vcproj (renamed from PCbuild/unicodedata.vcproj)8
-rw-r--r--PC/VS9.0/w9xpopen.vcproj (renamed from PCbuild/w9xpopen.vcproj)4
-rw-r--r--PC/VS9.0/winsound.vcproj (renamed from PCbuild/winsound.vcproj)4
-rw-r--r--PC/VS9.0/x64.vsprops (renamed from PCbuild/x64.vsprops)0
-rw-r--r--PC/VS9.0/xxlimited.vcproj (renamed from PCbuild/xxlimited.vcproj)4
-rw-r--r--PC/_msi.c18
-rw-r--r--PC/_subprocess.c690
-rw-r--r--PC/config.c11
-rw-r--r--PC/dl_nt.c10
-rw-r--r--PC/example_nt/example.vcproj4
-rw-r--r--PC/frozen_dllmain.c6
-rw-r--r--PC/getpathp.c87
-rw-r--r--PC/import_nt.c86
-rw-r--r--PC/launcher.c1365
-rw-r--r--PC/launcher.icobin0 -> 19790 bytes-rwxr-xr-xPC/msvcrtmodule.c27
-rw-r--r--PC/os2emx/Makefile2
-rw-r--r--PC/os2emx/README.os2emx2
-rw-r--r--PC/os2emx/config.c3
-rw-r--r--PC/os2emx/pyconfig.h2
-rw-r--r--PC/os2emx/python33.def (renamed from PC/os2emx/python27.def)250
-rw-r--r--PC/os2vacpp/config.c3
-rw-r--r--PC/pyconfig.h20
-rw-r--r--PC/pylauncher.rc51
-rw-r--r--PC/python3.def1384
-rw-r--r--PC/python3.mak10
-rw-r--r--PC/python33gen.py (renamed from PC/python32gen.py)13
-rw-r--r--PC/python33stub.def (renamed from PC/python32stub.def)11
-rw-r--r--PC/python_nt.rc2
-rw-r--r--PC/readme.txt2
-rw-r--r--PC/winreg.c86
-rw-r--r--PC/winsound.c2
-rw-r--r--PCbuild/_bz2.vcxproj261
-rw-r--r--PCbuild/_bz2.vcxproj.filters48
-rw-r--r--PCbuild/_ctypes.vcxproj294
-rw-r--r--PCbuild/_ctypes.vcxproj.filters65
-rw-r--r--PCbuild/_ctypes_test.vcxproj192
-rw-r--r--PCbuild/_ctypes_test.vcxproj.filters21
-rw-r--r--PCbuild/_decimal.vcxproj308
-rw-r--r--PCbuild/_decimal.vcxproj.filters116
-rw-r--r--PCbuild/_elementtree.vcxproj270
-rw-r--r--PCbuild/_elementtree.vcxproj.filters72
-rw-r--r--PCbuild/_freeze_importlib.vcxproj188
-rw-r--r--PCbuild/_freeze_importlib.vcxproj.filters27
-rw-r--r--PCbuild/_hashlib.vcxproj282
-rw-r--r--PCbuild/_hashlib.vcxproj.filters13
-rw-r--r--PCbuild/_lzma.vcxproj250
-rw-r--r--PCbuild/_lzma.vcxproj.filters13
-rw-r--r--PCbuild/_msi.vcxproj226
-rw-r--r--PCbuild/_msi.vcxproj.filters13
-rw-r--r--PCbuild/_multiprocessing.vcxproj230
-rw-r--r--PCbuild/_multiprocessing.vcxproj.filters24
-rw-r--r--PCbuild/_socket.vcxproj229
-rw-r--r--PCbuild/_socket.vcxproj.filters21
-rw-r--r--PCbuild/_sqlite3.vcxproj274
-rw-r--r--PCbuild/_sqlite3.vcxproj.filters72
-rw-r--r--PCbuild/_ssl.vcxproj286
-rw-r--r--PCbuild/_ssl.vcxproj.filters13
-rw-r--r--PCbuild/_testbuffer.vcxproj217
-rw-r--r--PCbuild/_testbuffer.vcxproj.filters13
-rw-r--r--PCbuild/_testcapi.vcxproj218
-rw-r--r--PCbuild/_testcapi.vcxproj.filters13
-rw-r--r--PCbuild/_tkinter.vcxproj251
-rw-r--r--PCbuild/_tkinter.vcxproj.filters16
-rw-r--r--PCbuild/bdist_wininst.vcxproj158
-rw-r--r--PCbuild/bdist_wininst.vcxproj.filters61
-rw-r--r--PCbuild/build.bat7
-rw-r--r--PCbuild/build_ssl.bat4
-rw-r--r--PCbuild/build_ssl.py8
-rw-r--r--PCbuild/build_tkinter.py6
-rw-r--r--PCbuild/debug.props27
-rw-r--r--PCbuild/env.bat8
-rw-r--r--PCbuild/kill_python.c2
-rw-r--r--PCbuild/kill_python.vcxproj120
-rw-r--r--PCbuild/kill_python.vcxproj.filters13
-rw-r--r--PCbuild/make_buildinfo.c67
-rw-r--r--PCbuild/make_buildinfo.vcxproj52
-rw-r--r--PCbuild/make_buildinfo.vcxproj.filters14
-rw-r--r--PCbuild/make_versioninfo.vcxproj200
-rw-r--r--PCbuild/make_versioninfo.vcxproj.filters13
-rw-r--r--PCbuild/pcbuild.sln273
-rw-r--r--PCbuild/pginstrument.props38
-rw-r--r--PCbuild/pgupdate.props17
-rw-r--r--PCbuild/pyd.props25
-rw-r--r--PCbuild/pyd_d.props31
-rw-r--r--PCbuild/pyexpat.vcxproj237
-rw-r--r--PCbuild/pyexpat.vcxproj.filters33
-rw-r--r--PCbuild/pylauncher.vcxproj311
-rw-r--r--PCbuild/pylauncher.vcxproj.filters32
-rw-r--r--PCbuild/pyproject.props105
-rw-r--r--PCbuild/python.vcxproj368
-rw-r--r--PCbuild/python.vcxproj.filters26
-rw-r--r--PCbuild/python3dll.vcxproj184
-rw-r--r--PCbuild/python3dll.vcxproj.filters32
-rw-r--r--PCbuild/pythoncore.vcxproj680
-rw-r--r--PCbuild/pythoncore.vcxproj.filters925
-rw-r--r--PCbuild/pythonw.vcxproj346
-rw-r--r--PCbuild/pythonw.vcxproj.filters21
-rw-r--r--PCbuild/pywlauncher.vcxproj246
-rw-r--r--PCbuild/pywlauncher.vcxproj.filters32
-rw-r--r--PCbuild/readme.txt37
-rw-r--r--PCbuild/release.props19
-rw-r--r--PCbuild/select.vcxproj234
-rw-r--r--PCbuild/select.vcxproj.filters13
-rw-r--r--PCbuild/sqlite3.props16
-rw-r--r--PCbuild/sqlite3.vcxproj240
-rw-r--r--PCbuild/sqlite3.vcxproj.filters24
-rw-r--r--PCbuild/ssl.vcxproj221
-rw-r--r--PCbuild/unicodedata.vcxproj222
-rw-r--r--PCbuild/unicodedata.vcxproj.filters24
-rw-r--r--PCbuild/vs9to10.py56
-rw-r--r--PCbuild/w9xpopen.vcxproj287
-rw-r--r--PCbuild/w9xpopen.vcxproj.filters13
-rw-r--r--PCbuild/winsound.vcxproj218
-rw-r--r--PCbuild/winsound.vcxproj.filters14
-rw-r--r--PCbuild/x64.props20
-rw-r--r--PCbuild/xxlimited.vcxproj194
-rw-r--r--PCbuild/xxlimited.vcxproj.filters13
-rw-r--r--Parser/Python.asdl203
-rw-r--r--Parser/asdl.py35
-rwxr-xr-xParser/asdl_c.py111
-rw-r--r--Parser/intrcheck.c174
-rw-r--r--Parser/myreadline.c37
-rw-r--r--Parser/node.c2
-rw-r--r--Parser/parsetok.c64
-rw-r--r--Parser/parsetok_pgen.c2
-rw-r--r--Parser/pgenmain.c2
-rw-r--r--Parser/tokenizer.c100
-rw-r--r--Parser/tokenizer.h9
-rw-r--r--Python/Python-ast.c1277
-rw-r--r--Python/_warnings.c97
-rw-r--r--Python/asdl.c4
-rw-r--r--Python/ast.c925
-rw-r--r--Python/bltinmodule.c211
-rw-r--r--Python/ceval.c864
-rw-r--r--Python/ceval_gil.h227
-rw-r--r--Python/codecs.c230
-rw-r--r--Python/compile.c337
-rw-r--r--Python/condvar.h390
-rw-r--r--Python/dtoa.c54
-rw-r--r--Python/dynload_aix.c19
-rw-r--r--Python/dynload_dl.c16
-rw-r--r--Python/dynload_hpux.c20
-rw-r--r--Python/dynload_next.c10
-rw-r--r--Python/dynload_os2.c8
-rw-r--r--Python/dynload_shlib.c43
-rw-r--r--Python/dynload_stub.c4
-rw-r--r--Python/dynload_win.c72
-rw-r--r--Python/errors.c165
-rw-r--r--Python/fileutils.c88
-rw-r--r--Python/formatter_unicode.c1534
-rw-r--r--Python/frozen.c3
-rw-r--r--Python/getargs.c425
-rw-r--r--Python/getcopyright.c2
-rw-r--r--Python/graminit.c190
-rw-r--r--Python/import.c3336
-rw-r--r--Python/importdl.c87
-rw-r--r--Python/importdl.h26
-rw-r--r--Python/importlib.h4404
-rwxr-xr-xPython/makeopcodetargets.py3
-rw-r--r--Python/marshal.c74
-rw-r--r--Python/modsupport.c16
-rw-r--r--Python/opcode_targets.h2
-rw-r--r--Python/peephole.c189
-rw-r--r--Python/pyarena.c8
-rw-r--r--Python/pystate.c49
-rw-r--r--Python/pystrtod.c41
-rw-r--r--Python/pythonrun.c344
-rw-r--r--Python/pytime.c203
-rw-r--r--Python/random.c11
-rw-r--r--Python/symtable.c166
-rw-r--r--Python/sysmodule.c211
-rw-r--r--Python/thread.c126
-rw-r--r--Python/thread_cthread.h112
-rw-r--r--Python/thread_lwp.h113
-rw-r--r--Python/thread_nt.h104
-rw-r--r--Python/thread_pth.h3
-rw-r--r--Python/thread_pthread.h25
-rw-r--r--Python/thread_sgi.h259
-rw-r--r--Python/thread_solaris.h130
-rw-r--r--Python/thread_wince.h136
-rw-r--r--Python/traceback.c294
-rw-r--r--README30
-rw-r--r--Tools/README12
-rw-r--r--Tools/buildbot/build-amd64.bat8
-rw-r--r--Tools/buildbot/build.bat7
-rw-r--r--Tools/buildbot/buildmsi.bat4
-rw-r--r--Tools/buildbot/clean-amd64.bat6
-rw-r--r--Tools/buildbot/clean.bat6
-rw-r--r--Tools/buildbot/external-amd64.bat12
-rw-r--r--Tools/buildbot/external-common.bat26
-rw-r--r--Tools/buildbot/external.bat12
-rw-r--r--Tools/ccbench/ccbench.py119
-rwxr-xr-xTools/demo/life.py89
-rwxr-xr-xTools/demo/ss1.py1
-rw-r--r--Tools/gdb/libpython.py209
-rw-r--r--Tools/hg/hgtouch.py103
-rwxr-xr-xTools/i18n/pygettext.py1
-rw-r--r--Tools/importbench/README6
-rw-r--r--Tools/importbench/importbench.py252
-rw-r--r--Tools/iobench/iobench.py17
-rw-r--r--Tools/msi/msi.py268
-rw-r--r--Tools/msi/msilib.py18
-rw-r--r--Tools/msi/uuids.py99
-rw-r--r--Tools/parser/test_unparse.py31
-rw-r--r--Tools/parser/unparse.py39
-rwxr-xr-xTools/pybench/pybench.py28
-rw-r--r--Tools/scripts/README4
-rwxr-xr-xTools/scripts/diff.py16
-rwxr-xr-xTools/scripts/findnocoding.py23
-rwxr-xr-xTools/scripts/highlight.py260
-rwxr-xr-xTools/scripts/import_diagnostics.py37
-rwxr-xr-xTools/scripts/patchcheck.py53
-rwxr-xr-xTools/scripts/pysource.py14
-rwxr-xr-xTools/scripts/pyvenv11
-rwxr-xr-xTools/scripts/reindent.py18
-rwxr-xr-xTools/scripts/run_tests.py51
-rwxr-xr-xTools/scripts/texi2html.py10
-rw-r--r--Tools/ssl/make_ssl_data.py68
-rw-r--r--Tools/stringbench/README68
-rwxr-xr-xTools/stringbench/stringbench.py1482
-rw-r--r--Tools/unicode/comparecodecs.py2
-rw-r--r--Tools/unicode/gencodec.py9
-rw-r--r--Tools/unicode/makeunicodedata.py379
-rw-r--r--Tools/unittestgui/unittestgui.py1
-rwxr-xr-xconfig.guess1530
-rwxr-xr-xconfig.sub1773
-rwxr-xr-xconfigure1857
-rw-r--r--configure.ac802
-rw-r--r--pyconfig.h.in234
-rw-r--r--setup.py481
1688 files changed, 340558 insertions, 84434 deletions
diff --git a/.bzrignore b/.bzrignore
index 959a7df..897084d 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -33,7 +33,6 @@ Modules/Setup.local
Modules/config.c
Modules/ld_so_aix
Parser/pgen
-Parser/pgen.stamp
Lib/test/data/*
Lib/lib2to3/Grammar*.pickle
Lib/lib2to3/PatternGrammar*.pickle
diff --git a/.gitignore b/.gitignore
index c1a8055..a88b36e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ Doc/tools/jinja2/
Doc/tools/pygments/
Doc/tools/sphinx/
Lib/lib2to3/*.pickle
+Lib/_sysconfigdata.py
Lib/plat-mac/errors.rsrc.df.rsrc
Makefile
Makefile.pre
@@ -37,7 +38,6 @@ PCbuild/Win32-temp-*
PCbuild/amd64/
.purify
Parser/pgen
-Parser/pgen.stamp
__pycache__
autom4te.cache
build/
diff --git a/.hgeol b/.hgeol
index ed13171..64bb7cf 100644
--- a/.hgeol
+++ b/.hgeol
@@ -28,8 +28,11 @@
Lib/email/test/data/msg_26.txt = BIN
Lib/test/cjkencodings/* = BIN
-Lib/test/sndhdrdata/sndhdr.* = BIN
Lib/test/decimaltestdata/*.decTest = BIN
+Lib/test/sndhdrdata/sndhdr.* = BIN
+Lib/test/test_email/data/msg_26.txt = BIN
+
+Lib/venv/scripts/nt/* = BIN
# All other files (which presumably are human-editable) are "native".
# This must be the last rule!
diff --git a/.hgignore b/.hgignore
index 24df3b9..a01a178 100644
--- a/.hgignore
+++ b/.hgignore
@@ -5,7 +5,8 @@ Makefile$
Makefile.pre$
TAGS$
autom4te.cache$
-build/
+^build/
+^Doc/build/
buildno$
config.cache
config.log
@@ -31,7 +32,6 @@ Modules/Setup.local
Modules/config.c
Modules/ld_so_aix$
Parser/pgen$
-Parser/pgen.stamp$
^core
^python-gdb.py
^python.exe-gdb.py
@@ -47,6 +47,7 @@ libpython*.so*
*.pyd
*.cover
*~
+Lib/_sysconfigdata.py
Lib/lib2to3/*.pickle
Lib/test/data/*
Misc/*.wpu
@@ -54,6 +55,18 @@ PC/python_nt*.h
PC/pythonnt_rc*.h
PC/*.obj
PC/*.exe
+PC/*/*.exe
+PC/*/*.exp
+PC/*/*.lib
+PC/*/*.bsc
+PC/*/*.dll
+PC/*/*.pdb
+PC/*/*.user
+PC/*/*.ncb
+PC/*/*.suo
+PC/*/Win32-temp-*
+PC/*/x64-temp-*
+PC/*/amd64
PCbuild/*.exe
PCbuild/*.dll
PCbuild/*.pdb
@@ -62,8 +75,15 @@ PCbuild/*.exp
PCbuild/*.o
PCbuild/*.ncb
PCbuild/*.bsc
+PCbuild/*.user
+PCbuild/*.suo
+PCbuild/*.*sdf
PCbuild/Win32-temp-*
+PCbuild/x64-temp-*
+PCbuild/amd64
+BuildLog.htm
__pycache__
+Modules/_freeze_importlib
Modules/_testembed
.coverage
coverage/
diff --git a/.hgtags b/.hgtags
index f061b90..efa4d2e 100644
--- a/.hgtags
+++ b/.hgtags
@@ -99,3 +99,13 @@ c860feaa348d663e598986894ee4680480577e15 v3.2.2rc1
7085403daf439adb3f9e70ef13f6bedb1c447376 v3.2.3rc1
428f05cb7277e1d42bb9dd8d1af6b6270ebc6112 v3.2.3rc2
3d0686d90f55a78f96d9403da2c52dc2411419d0 v3.2.3
+f1a9a6505731714f0e157453ff850e3b71615c45 v3.3.0a1
+2f69db52d6de306cdaef0a0cc00cc823fb350b01 v3.3.0a2
+0b53b70a40a00013505eb35e3660057b62be77be v3.3.0a3
+7c51388a3aa7ce76a8541bbbdfc05d2d259a162c v3.3.0a4
+e15c554cd43eb23bc0a528a4e8741da9bbec9607 v3.3.0b1
+4972a8f1b2aa3d7cdd64dc96aa7fa112fe1ea343 v3.3.0b2
+8bb5c7bc46ba43804480f3e328e1fa956672c885 v3.3.0rc1
+88a0792e8ba3e4916b24c7e7a522c277d326d66e v3.3.0rc2
+c191d21cefafb3832c45570e84854e309aa62eaa v3.3.0rc3
+bd8afb90ebf28ba4edc901d4a235f75e7bbc79fd v3.3.0
diff --git a/.hgtouch b/.hgtouch
new file mode 100644
index 0000000..ad936fa
--- /dev/null
+++ b/.hgtouch
@@ -0,0 +1,12 @@
+# -*- Makefile -*-
+# Define dependencies of generated files that are checked into hg.
+# The syntax of this file uses make rule dependencies, without actions
+
+Python/importlib.h: Lib/importlib/_bootstrap.py Python/freeze_importlib.py
+
+Include/ast.h: Parser/Python.asdl Parser/asdl.py Parser/asdl_c.py
+Python/Python-ast.c: Include/ast.h
+
+Python/opcode_targets.h: Python/makeopcodetargets.py Lib/opcode.py
+
+Objects/typeslots.inc: Include/typeslots.h Objects/typeslots.py \ No newline at end of file
diff --git a/Doc/Makefile b/Doc/Makefile
index cb56ea9..6a804d4 100644
--- a/Doc/Makefile
+++ b/Doc/Makefile
@@ -53,7 +53,7 @@ checkout:
fi
@if [ ! -d tools/pygments ]; then \
echo "Checking out Pygments..."; \
- svn checkout $(SVNROOT)/external/Pygments-1.3.1/pygments tools/pygments; \
+ svn checkout $(SVNROOT)/external/Pygments-1.5dev-20120930/pygments tools/pygments; \
fi
update: clean checkout
diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst
index d4dda7c..b28aa5f 100644
--- a/Doc/c-api/arg.rst
+++ b/Doc/c-api/arg.rst
@@ -146,7 +146,7 @@ Unless otherwise stated, buffers are not NUL-terminated.
Like ``u#``, but the Python object may also be ``None``, in which case the
:c:type:`Py_UNICODE` pointer is set to *NULL*.
-``U`` (:class:`str`) [PyUnicodeObject \*]
+``U`` (:class:`str`) [PyObject \*]
Requires that the Python object is a Unicode object, without attempting
any conversion. Raises :exc:`TypeError` if the object is not a Unicode
object. The C variable may also be declared as :c:type:`PyObject\*`.
@@ -260,9 +260,12 @@ Numbers
``n`` (:class:`int`) [Py_ssize_t]
Convert a Python integer to a C :c:type:`Py_ssize_t`.
-``c`` (:class:`bytes` of length 1) [char]
- Convert a Python byte, represented as a :class:`bytes` object of length 1,
- to a C :c:type:`char`.
+``c`` (:class:`bytes` or :class:`bytearray` of length 1) [char]
+ Convert a Python byte, represented as a :class:`bytes` or
+ :class:`bytearray` object of length 1, to a C :c:type:`char`.
+
+ .. versionchanged:: 3.3
+ Allow :class:`bytearray` objects.
``C`` (:class:`str` of length 1) [int]
Convert a Python character, represented as a :class:`str` object of
@@ -315,6 +318,15 @@ Other objects
.. versionchanged:: 3.1
``Py_CLEANUP_SUPPORTED`` was added.
+``p`` (:class:`bool`) [int]
+ Tests the value passed in for truth (a boolean **p**\redicate) and converts
+ the result to its equivalent C true/false integer value.
+ Sets the int to 1 if the expression was true and 0 if it was false.
+ This accepts any valid Python value. See :ref:`truth` for more
+ information about how Python tests values for truth.
+
+ .. versionadded:: 3.3
+
``(items)`` (:class:`tuple`) [*matching-items*]
The object must be a Python sequence whose length is the number of format units
in *items*. The C arguments must correspond to the individual format units in
@@ -336,6 +348,15 @@ inside nested parentheses. They are:
:c:func:`PyArg_ParseTuple` does not touch the contents of the corresponding C
variable(s).
+``$``
+ :c:func:`PyArg_ParseTupleAndKeywords` only:
+ Indicates that the remaining arguments in the Python argument list are
+ keyword-only. Currently, all keyword-only arguments must also be optional
+ arguments, so ``|`` must always be specified before ``$`` in the format
+ string.
+
+ .. versionadded:: 3.3
+
``:``
The list of format units ends here; the string after the colon is used as the
function name in error messages (the "associated value" of the exception that
diff --git a/Doc/c-api/buffer.rst b/Doc/c-api/buffer.rst
index d98ece3..d636935 100644
--- a/Doc/c-api/buffer.rst
+++ b/Doc/c-api/buffer.rst
@@ -7,6 +7,7 @@ Buffer Protocol
.. sectionauthor:: Greg Stein <gstein@lyra.org>
.. sectionauthor:: Benjamin Peterson
+.. sectionauthor:: Stefan Krah
.. index::
@@ -20,7 +21,7 @@ as image processing or numeric analysis.
While each of these types have their own semantics, they share the common
characteristic of being backed by a possibly large memory buffer. It is
-then desireable, in some situations, to access that buffer directly and
+then desirable, in some situations, to access that buffer directly and
without intermediate copying.
Python provides such a facility at the C level in the form of the *buffer
@@ -60,8 +61,10 @@ isn't needed anymore. Failure to do so could lead to various issues such as
resource leaks.
-The buffer structure
-====================
+.. _buffer-structure:
+
+Buffer structure
+================
Buffer structures (or simply "buffers") are useful as a way to expose the
binary data from another object to the Python programmer. They can also be
@@ -78,249 +81,411 @@ allows them to be created and copied very simply. When a generic wrapper
around a buffer is needed, a :ref:`memoryview <memoryview-objects>` object
can be created.
+For short instructions how to write an exporting object, see
+:ref:`Buffer Object Structures <buffer-structs>`. For obtaining
+a buffer, see :c:func:`PyObject_GetBuffer`.
.. c:type:: Py_buffer
- .. c:member:: void *buf
+ .. c:member:: void \*obj
+
+ A new reference to the exporting object. The reference is owned by
+ the consumer and automatically decremented and set to *NULL* by
+ :c:func:`PyBuffer_Release`. The field is the equivalent of the return
+ value of any standard C-API function.
+
+ As a special case, for *temporary* buffers that are wrapped by
+ :c:func:`PyMemoryView_FromBuffer` or :c:func:`PyBuffer_FillInfo`
+ this field is *NULL*. In general, exporting objects MUST NOT
+ use this scheme.
- A pointer to the start of the memory for the object.
+ .. c:member:: void \*buf
+
+ A pointer to the start of the logical structure described by the buffer
+ fields. This can be any location within the underlying physical memory
+ block of the exporter. For example, with negative :c:member:`~Py_buffer.strides`
+ the value may point to the end of the memory block.
+
+ For contiguous arrays, the value points to the beginning of the memory
+ block.
.. c:member:: Py_ssize_t len
- :noindex:
- The total length of the memory in bytes.
+ ``product(shape) * itemsize``. For contiguous arrays, this is the length
+ of the underlying memory block. For non-contiguous arrays, it is the length
+ that the logical structure would have if it were copied to a contiguous
+ representation.
+
+ Accessing ``((char *)buf)[0] up to ((char *)buf)[len-1]`` is only valid
+ if the buffer has been obtained by a request that guarantees contiguity. In
+ most cases such a request will be :c:macro:`PyBUF_SIMPLE` or :c:macro:`PyBUF_WRITABLE`.
.. c:member:: int readonly
- An indicator of whether the buffer is read only.
+ An indicator of whether the buffer is read-only. This field is controlled
+ by the :c:macro:`PyBUF_WRITABLE` flag.
+
+ .. c:member:: Py_ssize_t itemsize
+
+ Item size in bytes of a single element. Same as the value of :func:`struct.calcsize`
+ called on non-NULL :c:member:`~Py_buffer.format` values.
+
+ Important exception: If a consumer requests a buffer without the
+ :c:macro:`PyBUF_FORMAT` flag, :c:member:`~Py_Buffer.format` will
+ be set to *NULL*, but :c:member:`~Py_buffer.itemsize` still has
+ the value for the original format.
+
+ If :c:member:`~Py_Buffer.shape` is present, the equality
+ ``product(shape) * itemsize == len`` still holds and the consumer
+ can use :c:member:`~Py_buffer.itemsize` to navigate the buffer.
+
+ If :c:member:`~Py_Buffer.shape` is *NULL* as a result of a :c:macro:`PyBUF_SIMPLE`
+ or a :c:macro:`PyBUF_WRITABLE` request, the consumer must disregard
+ :c:member:`~Py_buffer.itemsize` and assume ``itemsize == 1``.
- .. c:member:: const char *format
- :noindex:
+ .. c:member:: const char \*format
- A *NULL* terminated string in :mod:`struct` module style syntax giving
- the contents of the elements available through the buffer. If this is
- *NULL*, ``"B"`` (unsigned bytes) is assumed.
+ A *NUL* terminated string in :mod:`struct` module style syntax describing
+ the contents of a single item. If this is *NULL*, ``"B"`` (unsigned bytes)
+ is assumed.
+
+ This field is controlled by the :c:macro:`PyBUF_FORMAT` flag.
.. c:member:: int ndim
- The number of dimensions the memory represents as a multi-dimensional
- array. If it is 0, :c:data:`strides` and :c:data:`suboffsets` must be
- *NULL*.
-
- .. c:member:: Py_ssize_t *shape
-
- An array of :c:type:`Py_ssize_t`\s the length of :c:data:`ndim` giving the
- shape of the memory as a multi-dimensional array. Note that
- ``((*shape)[0] * ... * (*shape)[ndims-1])*itemsize`` should be equal to
- :c:data:`len`.
-
- .. c:member:: Py_ssize_t *strides
-
- An array of :c:type:`Py_ssize_t`\s the length of :c:data:`ndim` giving the
- number of bytes to skip to get to a new element in each dimension.
-
- .. c:member:: Py_ssize_t *suboffsets
-
- An array of :c:type:`Py_ssize_t`\s the length of :c:data:`ndim`. If these
- suboffset numbers are greater than or equal to 0, then the value stored
- along the indicated dimension is a pointer and the suboffset value
- dictates how many bytes to add to the pointer after de-referencing. A
- suboffset value that it negative indicates that no de-referencing should
- occur (striding in a contiguous memory block).
-
- Here is a function that returns a pointer to the element in an N-D array
- pointed to by an N-dimensional index when there are both non-NULL strides
- and suboffsets::
-
- void *get_item_pointer(int ndim, void *buf, Py_ssize_t *strides,
- Py_ssize_t *suboffsets, Py_ssize_t *indices) {
- char *pointer = (char*)buf;
- int i;
- for (i = 0; i < ndim; i++) {
- pointer += strides[i] * indices[i];
- if (suboffsets[i] >=0 ) {
- pointer = *((char**)pointer) + suboffsets[i];
- }
- }
- return (void*)pointer;
- }
+ The number of dimensions the memory represents as an n-dimensional array.
+ If it is 0, :c:member:`~Py_Buffer.buf` points to a single item representing
+ a scalar. In this case, :c:member:`~Py_buffer.shape`, :c:member:`~Py_buffer.strides`
+ and :c:member:`~Py_buffer.suboffsets` MUST be *NULL*.
+ The macro :c:macro:`PyBUF_MAX_NDIM` limits the maximum number of dimensions
+ to 64. Exporters MUST respect this limit, consumers of multi-dimensional
+ buffers SHOULD be able to handle up to :c:macro:`PyBUF_MAX_NDIM` dimensions.
- .. c:member:: Py_ssize_t itemsize
+ .. c:member:: Py_ssize_t \*shape
+
+ An array of :c:type:`Py_ssize_t` of length :c:member:`~Py_buffer.ndim`
+ indicating the shape of the memory as an n-dimensional array. Note that
+ ``shape[0] * ... * shape[ndim-1] * itemsize`` MUST be equal to
+ :c:member:`~Py_buffer.len`.
+
+ Shape values are restricted to ``shape[n] >= 0``. The case
+ ``shape[n] == 0`` requires special attention. See `complex arrays`_
+ for further information.
+
+ The shape array is read-only for the consumer.
+
+ .. c:member:: Py_ssize_t \*strides
+
+ An array of :c:type:`Py_ssize_t` of length :c:member:`~Py_buffer.ndim`
+ giving the number of bytes to skip to get to a new element in each
+ dimension.
+
+ Stride values can be any integer. For regular arrays, strides are
+ usually positive, but a consumer MUST be able to handle the case
+ ``strides[n] <= 0``. See `complex arrays`_ for further information.
+
+ The strides array is read-only for the consumer.
+
+ .. c:member:: Py_ssize_t \*suboffsets
+
+ An array of :c:type:`Py_ssize_t` of length :c:member:`~Py_buffer.ndim`.
+ If ``suboffsets[n] >= 0``, the values stored along the nth dimension are
+ pointers and the suboffset value dictates how many bytes to add to each
+ pointer after de-referencing. A suboffset value that is negative
+ indicates that no de-referencing should occur (striding in a contiguous
+ memory block).
- This is a storage for the itemsize (in bytes) of each element of the
- shared memory. It is technically un-necessary as it can be obtained
- using :c:func:`PyBuffer_SizeFromFormat`, however an exporter may know
- this information without parsing the format string and it is necessary
- to know the itemsize for proper interpretation of striding. Therefore,
- storing it is more convenient and faster.
+ This type of array representation is used by the Python Imaging Library
+ (PIL). See `complex arrays`_ for further information how to access elements
+ of such an array.
- .. c:member:: void *internal
+ The suboffsets array is read-only for the consumer.
+
+ .. c:member:: void \*internal
This is for use internally by the exporting object. For example, this
might be re-cast as an integer by the exporter and used to store flags
about whether or not the shape, strides, and suboffsets arrays must be
- freed when the buffer is released. The consumer should never alter this
+ freed when the buffer is released. The consumer MUST NOT alter this
value.
+.. _buffer-request-types:
-Buffer-related functions
-========================
+Buffer request types
+====================
+Buffers are usually obtained by sending a buffer request to an exporting
+object via :c:func:`PyObject_GetBuffer`. Since the complexity of the logical
+structure of the memory can vary drastically, the consumer uses the *flags*
+argument to specify the exact buffer type it can handle.
-.. c:function:: int PyObject_CheckBuffer(PyObject *obj)
+All :c:data:`Py_buffer` fields are unambiguously defined by the request
+type.
+
+request-independent fields
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The following fields are not influenced by *flags* and must always be filled in
+with the correct values: :c:member:`~Py_buffer.obj`, :c:member:`~Py_buffer.buf`,
+:c:member:`~Py_buffer.len`, :c:member:`~Py_buffer.itemsize`, :c:member:`~Py_buffer.ndim`.
- Return 1 if *obj* supports the buffer interface otherwise 0. When 1 is
- returned, it doesn't guarantee that :c:func:`PyObject_GetBuffer` will
- succeed.
+readonly, format
+~~~~~~~~~~~~~~~~
-.. c:function:: int PyObject_GetBuffer(PyObject *obj, Py_buffer *view, int flags)
+ .. c:macro:: PyBUF_WRITABLE
- Export a view over some internal data from the target object *obj*.
- *obj* must not be NULL, and *view* must point to an existing
- :c:type:`Py_buffer` structure allocated by the caller (most uses of
- this function will simply declare a local variable of type
- :c:type:`Py_buffer`). The *flags* argument is a bit field indicating
- what kind of buffer is requested. The buffer interface allows
- for complicated memory layout possibilities; however, some callers
- won't want to handle all the complexity and instead request a simple
- view of the target object (using :c:macro:`PyBUF_SIMPLE` for a read-only
- view and :c:macro:`PyBUF_WRITABLE` for a read-write view).
+ Controls the :c:member:`~Py_buffer.readonly` field. If set, the exporter
+ MUST provide a writable buffer or else report failure. Otherwise, the
+ exporter MAY provide either a read-only or writable buffer, but the choice
+ MUST be consistent for all consumers.
- Some exporters may not be able to share memory in every possible way and
- may need to raise errors to signal to some consumers that something is
- just not possible. These errors should be a :exc:`BufferError` unless
- there is another error that is actually causing the problem. The
- exporter can use flags information to simplify how much of the
- :c:data:`Py_buffer` structure is filled in with non-default values and/or
- raise an error if the object can't support a simpler view of its memory.
+ .. c:macro:: PyBUF_FORMAT
- On success, 0 is returned and the *view* structure is filled with useful
- values. On error, -1 is returned and an exception is raised; the *view*
- is left in an undefined state.
+ Controls the :c:member:`~Py_buffer.format` field. If set, this field MUST
+ be filled in correctly. Otherwise, this field MUST be *NULL*.
- The following are the possible values to the *flags* arguments.
- .. c:macro:: PyBUF_SIMPLE
+:c:macro:`PyBUF_WRITABLE` can be \|'d to any of the flags in the next section.
+Since :c:macro:`PyBUF_SIMPLE` is defined as 0, :c:macro:`PyBUF_WRITABLE`
+can be used as a stand-alone flag to request a simple writable buffer.
- This is the default flag. The returned buffer exposes a read-only
- memory area. The format of data is assumed to be raw unsigned bytes,
- without any particular structure. This is a "stand-alone" flag
- constant. It never needs to be '|'d to the others. The exporter will
- raise an error if it cannot provide such a contiguous buffer of bytes.
+:c:macro:`PyBUF_FORMAT` can be \|'d to any of the flags except :c:macro:`PyBUF_SIMPLE`.
+The latter already implies format ``B`` (unsigned bytes).
- .. c:macro:: PyBUF_WRITABLE
- Like :c:macro:`PyBUF_SIMPLE`, but the returned buffer is writable. If
- the exporter doesn't support writable buffers, an error is raised.
+shape, strides, suboffsets
+~~~~~~~~~~~~~~~~~~~~~~~~~~
- .. c:macro:: PyBUF_STRIDES
+The flags that control the logical structure of the memory are listed
+in decreasing order of complexity. Note that each flag contains all bits
+of the flags below it.
- This implies :c:macro:`PyBUF_ND`. The returned buffer must provide
- strides information (i.e. the strides cannot be NULL). This would be
- used when the consumer can handle strided, discontiguous arrays.
- Handling strides automatically assumes you can handle shape. The
- exporter can raise an error if a strided representation of the data is
- not possible (i.e. without the suboffsets).
- .. c:macro:: PyBUF_ND
++-----------------------------+-------+---------+------------+
+| Request | shape | strides | suboffsets |
++=============================+=======+=========+============+
+| .. c:macro:: PyBUF_INDIRECT | yes | yes | if needed |
++-----------------------------+-------+---------+------------+
+| .. c:macro:: PyBUF_STRIDES | yes | yes | NULL |
++-----------------------------+-------+---------+------------+
+| .. c:macro:: PyBUF_ND | yes | NULL | NULL |
++-----------------------------+-------+---------+------------+
+| .. c:macro:: PyBUF_SIMPLE | NULL | NULL | NULL |
++-----------------------------+-------+---------+------------+
- The returned buffer must provide shape information. The memory will be
- assumed C-style contiguous (last dimension varies the fastest). The
- exporter may raise an error if it cannot provide this kind of
- contiguous buffer. If this is not given then shape will be *NULL*.
- .. c:macro:: PyBUF_C_CONTIGUOUS
- PyBUF_F_CONTIGUOUS
- PyBUF_ANY_CONTIGUOUS
+contiguity requests
+~~~~~~~~~~~~~~~~~~~
- These flags indicate that the contiguity returned buffer must be
- respectively, C-contiguous (last dimension varies the fastest), Fortran
- contiguous (first dimension varies the fastest) or either one. All of
- these flags imply :c:macro:`PyBUF_STRIDES` and guarantee that the
- strides buffer info structure will be filled in correctly.
+C or Fortran contiguity can be explicitly requested, with and without stride
+information. Without stride information, the buffer must be C-contiguous.
- .. c:macro:: PyBUF_INDIRECT
++-----------------------------------+-------+---------+------------+--------+
+| Request | shape | strides | suboffsets | contig |
++===================================+=======+=========+============+========+
+| .. c:macro:: PyBUF_C_CONTIGUOUS | yes | yes | NULL | C |
++-----------------------------------+-------+---------+------------+--------+
+| .. c:macro:: PyBUF_F_CONTIGUOUS | yes | yes | NULL | F |
++-----------------------------------+-------+---------+------------+--------+
+| .. c:macro:: PyBUF_ANY_CONTIGUOUS | yes | yes | NULL | C or F |
++-----------------------------------+-------+---------+------------+--------+
+| .. c:macro:: PyBUF_ND | yes | NULL | NULL | C |
++-----------------------------------+-------+---------+------------+--------+
- This flag indicates the returned buffer must have suboffsets
- information (which can be NULL if no suboffsets are needed). This can
- be used when the consumer can handle indirect array referencing implied
- by these suboffsets. This implies :c:macro:`PyBUF_STRIDES`.
- .. c:macro:: PyBUF_FORMAT
+compound requests
+~~~~~~~~~~~~~~~~~
- The returned buffer must have true format information if this flag is
- provided. This would be used when the consumer is going to be checking
- for what 'kind' of data is actually stored. An exporter should always
- be able to provide this information if requested. If format is not
- explicitly requested then the format must be returned as *NULL* (which
- means ``'B'``, or unsigned bytes).
+All possible requests are fully defined by some combination of the flags in
+the previous section. For convenience, the buffer protocol provides frequently
+used combinations as single flags.
- .. c:macro:: PyBUF_STRIDED
+In the following table *U* stands for undefined contiguity. The consumer would
+have to call :c:func:`PyBuffer_IsContiguous` to determine contiguity.
- This is equivalent to ``(PyBUF_STRIDES | PyBUF_WRITABLE)``.
- .. c:macro:: PyBUF_STRIDED_RO
- This is equivalent to ``(PyBUF_STRIDES)``.
++-------------------------------+-------+---------+------------+--------+----------+--------+
+| Request | shape | strides | suboffsets | contig | readonly | format |
++===============================+=======+=========+============+========+==========+========+
+| .. c:macro:: PyBUF_FULL | yes | yes | if needed | U | 0 | yes |
++-------------------------------+-------+---------+------------+--------+----------+--------+
+| .. c:macro:: PyBUF_FULL_RO | yes | yes | if needed | U | 1 or 0 | yes |
++-------------------------------+-------+---------+------------+--------+----------+--------+
+| .. c:macro:: PyBUF_RECORDS | yes | yes | NULL | U | 0 | yes |
++-------------------------------+-------+---------+------------+--------+----------+--------+
+| .. c:macro:: PyBUF_RECORDS_RO | yes | yes | NULL | U | 1 or 0 | yes |
++-------------------------------+-------+---------+------------+--------+----------+--------+
+| .. c:macro:: PyBUF_STRIDED | yes | yes | NULL | U | 0 | NULL |
++-------------------------------+-------+---------+------------+--------+----------+--------+
+| .. c:macro:: PyBUF_STRIDED_RO | yes | yes | NULL | U | 1 or 0 | NULL |
++-------------------------------+-------+---------+------------+--------+----------+--------+
+| .. c:macro:: PyBUF_CONTIG | yes | NULL | NULL | C | 0 | NULL |
++-------------------------------+-------+---------+------------+--------+----------+--------+
+| .. c:macro:: PyBUF_CONTIG_RO | yes | NULL | NULL | C | 1 or 0 | NULL |
++-------------------------------+-------+---------+------------+--------+----------+--------+
- .. c:macro:: PyBUF_RECORDS
- This is equivalent to ``(PyBUF_STRIDES | PyBUF_FORMAT |
- PyBUF_WRITABLE)``.
+Complex arrays
+==============
- .. c:macro:: PyBUF_RECORDS_RO
+NumPy-style: shape and strides
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The logical structure of NumPy-style arrays is defined by :c:member:`~Py_buffer.itemsize`,
+:c:member:`~Py_buffer.ndim`, :c:member:`~Py_buffer.shape` and :c:member:`~Py_buffer.strides`.
+
+If ``ndim == 0``, the memory location pointed to by :c:member:`~Py_buffer.buf` is
+interpreted as a scalar of size :c:member:`~Py_buffer.itemsize`. In that case,
+both :c:member:`~Py_buffer.shape` and :c:member:`~Py_buffer.strides` are *NULL*.
+
+If :c:member:`~Py_buffer.strides` is *NULL*, the array is interpreted as
+a standard n-dimensional C-array. Otherwise, the consumer must access an
+n-dimensional array as follows:
+
+ ``ptr = (char *)buf + indices[0] * strides[0] + ... + indices[n-1] * strides[n-1]``
+ ``item = *((typeof(item) *)ptr);``
+
+
+As noted above, :c:member:`~Py_buffer.buf` can point to any location within
+the actual memory block. An exporter can check the validity of a buffer with
+this function:
+
+.. code-block:: python
+
+ def verify_structure(memlen, itemsize, ndim, shape, strides, offset):
+ """Verify that the parameters represent a valid array within
+ the bounds of the allocated memory:
+ char *mem: start of the physical memory block
+ memlen: length of the physical memory block
+ offset: (char *)buf - mem
+ """
+ if offset % itemsize:
+ return False
+ if offset < 0 or offset+itemsize > memlen:
+ return False
+ if any(v % itemsize for v in strides):
+ return False
+
+ if ndim <= 0:
+ return ndim == 0 and not shape and not strides
+ if 0 in shape:
+ return True
+
+ imin = sum(strides[j]*(shape[j]-1) for j in range(ndim)
+ if strides[j] <= 0)
+ imax = sum(strides[j]*(shape[j]-1) for j in range(ndim)
+ if strides[j] > 0)
+
+ return 0 <= offset+imin and offset+imax+itemsize <= memlen
+
+
+PIL-style: shape, strides and suboffsets
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to the regular items, PIL-style arrays can contain pointers
+that must be followed in order to get to the next element in a dimension.
+For example, the regular three-dimensional C-array ``char v[2][2][3]`` can
+also be viewed as an array of 2 pointers to 2 two-dimensional arrays:
+``char (*v[2])[2][3]``. In suboffsets representation, those two pointers
+can be embedded at the start of :c:member:`~Py_buffer.buf`, pointing
+to two ``char x[2][3]`` arrays that can be located anywhere in memory.
+
+
+Here is a function that returns a pointer to the element in an N-D array
+pointed to by an N-dimensional index when there are both non-NULL strides
+and suboffsets::
+
+ void *get_item_pointer(int ndim, void *buf, Py_ssize_t *strides,
+ Py_ssize_t *suboffsets, Py_ssize_t *indices) {
+ char *pointer = (char*)buf;
+ int i;
+ for (i = 0; i < ndim; i++) {
+ pointer += strides[i] * indices[i];
+ if (suboffsets[i] >=0 ) {
+ pointer = *((char**)pointer) + suboffsets[i];
+ }
+ }
+ return (void*)pointer;
+ }
- This is equivalent to ``(PyBUF_STRIDES | PyBUF_FORMAT)``.
- .. c:macro:: PyBUF_FULL
+Buffer-related functions
+========================
- This is equivalent to ``(PyBUF_INDIRECT | PyBUF_FORMAT |
- PyBUF_WRITABLE)``.
+.. c:function:: int PyObject_CheckBuffer(PyObject *obj)
- .. c:macro:: PyBUF_FULL_RO
+ Return 1 if *obj* supports the buffer interface otherwise 0. When 1 is
+ returned, it doesn't guarantee that :c:func:`PyObject_GetBuffer` will
+ succeed.
- This is equivalent to ``(PyBUF_INDIRECT | PyBUF_FORMAT)``.
- .. c:macro:: PyBUF_CONTIG
+.. c:function:: int PyObject_GetBuffer(PyObject *exporter, Py_buffer *view, int flags)
- This is equivalent to ``(PyBUF_ND | PyBUF_WRITABLE)``.
+ Send a request to *exporter* to fill in *view* as specified by *flags*.
+ If the exporter cannot provide a buffer of the exact type, it MUST raise
+ :c:data:`PyExc_BufferError`, set :c:member:`view->obj` to *NULL* and
+ return -1.
- .. c:macro:: PyBUF_CONTIG_RO
+ On success, fill in *view*, set :c:member:`view->obj` to a new reference
+ to *exporter* and return 0. In the case of chained buffer providers
+ that redirect requests to a single object, :c:member:`view->obj` MAY
+ refer to this object instead of *exporter* (See :ref:`Buffer Object Structures <buffer-structs>`).
- This is equivalent to ``(PyBUF_ND)``.
+ Successful calls to :c:func:`PyObject_GetBuffer` must be paired with calls
+ to :c:func:`PyBuffer_Release`, similar to :c:func:`malloc` and :c:func:`free`.
+ Thus, after the consumer is done with the buffer, :c:func:`PyBuffer_Release`
+ must be called exactly once.
.. c:function:: void PyBuffer_Release(Py_buffer *view)
- Release the buffer *view*. This should be called when the buffer is no
- longer being used as it may free memory from it.
+ Release the buffer *view* and decrement the reference count for
+ :c:member:`view->obj`. This function MUST be called when the buffer
+ is no longer being used, otherwise reference leaks may occur.
+
+ It is an error to call this function on a buffer that was not obtained via
+ :c:func:`PyObject_GetBuffer`.
.. c:function:: Py_ssize_t PyBuffer_SizeFromFormat(const char *)
- Return the implied :c:data:`~Py_buffer.itemsize` from the struct-stype
- :c:data:`~Py_buffer.format`.
+ Return the implied :c:data:`~Py_buffer.itemsize` from :c:data:`~Py_buffer.format`.
+ This function is not yet implemented.
-.. c:function:: int PyBuffer_IsContiguous(Py_buffer *view, char fortran)
+.. c:function:: int PyBuffer_IsContiguous(Py_buffer *view, char order)
- Return 1 if the memory defined by the *view* is C-style (*fortran* is
- ``'C'``) or Fortran-style (*fortran* is ``'F'``) contiguous or either one
- (*fortran* is ``'A'``). Return 0 otherwise.
+ Return 1 if the memory defined by the *view* is C-style (*order* is
+ ``'C'``) or Fortran-style (*order* is ``'F'``) contiguous or either one
+ (*order* is ``'A'``). Return 0 otherwise.
-.. c:function:: void PyBuffer_FillContiguousStrides(int ndim, Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t itemsize, char fortran)
+.. c:function:: void PyBuffer_FillContiguousStrides(int ndim, Py_ssize_t *shape, Py_ssize_t *strides, Py_ssize_t itemsize, char order)
Fill the *strides* array with byte-strides of a contiguous (C-style if
- *fortran* is ``'C'`` or Fortran-style if *fortran* is ``'F'``) array of the
+ *order* is ``'C'`` or Fortran-style if *order* is ``'F'``) array of the
given shape with the given number of bytes per element.
-.. c:function:: int PyBuffer_FillInfo(Py_buffer *view, PyObject *obj, void *buf, Py_ssize_t len, int readonly, int infoflags)
+.. c:function:: int PyBuffer_FillInfo(Py_buffer *view, PyObject *exporter, void *buf, Py_ssize_t len, int readonly, int flags)
+
+ Handle buffer requests for an exporter that wants to expose *buf* of size *len*
+ with writability set according to *readonly*. *buf* is interpreted as a sequence
+ of unsigned bytes.
+
+ The *flags* argument indicates the request type. This function always fills in
+ *view* as specified by flags, unless *buf* has been designated as read-only
+ and :c:macro:`PyBUF_WRITABLE` is set in *flags*.
+
+ On success, set :c:member:`view->obj` to a new reference to *exporter* and
+ return 0. Otherwise, raise :c:data:`PyExc_BufferError`, set
+ :c:member:`view->obj` to *NULL* and return -1;
+
+ If this function is used as part of a :ref:`getbufferproc <buffer-structs>`,
+ *exporter* MUST be set to the exporting object. Otherwise, *exporter* MUST
+ be NULL.
+
- Fill in a buffer-info structure, *view*, correctly for an exporter that can
- only share a contiguous chunk of memory of "unsigned bytes" of the given
- length. Return 0 on success and -1 (with raising an error) on error.
diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst
index 6932bb1..57e8072 100644
--- a/Doc/c-api/code.rst
+++ b/Doc/c-api/code.rst
@@ -31,11 +31,11 @@ bound into a function.
Return true if *co* is a :class:`code` object
-.. c:function:: int PyCode_GetNumFree(PyObject *co)
+.. c:function:: int PyCode_GetNumFree(PyCodeObject *co)
Return the number of free variables in *co*.
-.. c:function:: PyCodeObject *PyCode_New(int argcount, int kwonlyargcount, int nlocals, int stacksize, int flags, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, int firstlineno, PyObject *lnotab)
+.. c:function:: PyCodeObject* PyCode_New(int argcount, int kwonlyargcount, int nlocals, int stacksize, int flags, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, int firstlineno, PyObject *lnotab)
Return a new code object. If you need a dummy code object to
create a frame, use :c:func:`PyCode_NewEmpty` instead. Calling
@@ -43,7 +43,7 @@ bound into a function.
version since the definition of the bytecode changes often.
-.. c:function:: int PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno)
+.. c:function:: PyCodeObject* PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno)
Return a new empty code object with the specified filename,
function name, and first line number. It is illegal to
diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst
index dfc0a3a..9578f98 100644
--- a/Doc/c-api/conversion.rst
+++ b/Doc/c-api/conversion.rst
@@ -119,13 +119,13 @@ The following functions provide locale-independent string to number conversions.
.. versionadded:: 3.1
-.. c:function:: char* PyOS_stricmp(char *s1, char *s2)
+.. c:function:: int PyOS_stricmp(char *s1, char *s2)
Case insensitive comparison of strings. The function works almost
identically to :c:func:`strcmp` except that it ignores the case.
-.. c:function:: char* PyOS_strnicmp(char *s1, char *s2, Py_ssize_t size)
+.. c:function:: int PyOS_strnicmp(char *s1, char *s2, Py_ssize_t size)
Case insensitive comparison of strings. The function works almost
identically to :c:func:`strncmp` except that it ignores the case.
diff --git a/Doc/c-api/datetime.rst b/Doc/c-api/datetime.rst
index fcd1395..39542bd 100644
--- a/Doc/c-api/datetime.rst
+++ b/Doc/c-api/datetime.rst
@@ -170,6 +170,31 @@ and the type is not checked:
Return the microsecond, as an int from 0 through 999999.
+Macros to extract fields from time delta objects. The argument must be an
+instance of :c:data:`PyDateTime_Delta`, including subclasses. The argument must
+not be *NULL*, and the type is not checked:
+
+.. c:function:: int PyDateTime_DELTA_GET_DAYS(PyDateTime_Delta *o)
+
+ Return the number of days, as an int from -999999999 to 999999999.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: int PyDateTime_DELTA_GET_SECONDS(PyDateTime_Delta *o)
+
+ Return the number of seconds, as an int from 0 through 86399.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: int PyDateTime_DELTA_GET_MICROSECOND(PyDateTime_Delta *o)
+
+ Return the number of microseconds, as an int from 0 through 999999.
+
+ .. versionadded:: 3.3
+
+
Macros for the convenience of modules implementing the DB API:
.. c:function:: PyObject* PyDateTime_FromTimestamp(PyObject *args)
diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst
index 6df84e0..6bacc32 100644
--- a/Doc/c-api/dict.rst
+++ b/Doc/c-api/dict.rst
@@ -36,11 +36,11 @@ Dictionary Objects
Return a new empty dictionary, or *NULL* on failure.
-.. c:function:: PyObject* PyDictProxy_New(PyObject *dict)
+.. c:function:: PyObject* PyDictProxy_New(PyObject *mapping)
- Return a proxy object for a mapping which enforces read-only behavior.
- This is normally used to create a proxy to prevent modification of the
- dictionary for non-dynamic class types.
+ Return a :class:`types.MappingProxyType` object for a mapping which
+ enforces read-only behavior. This is normally used to create a view to
+ prevent modification of the dictionary for non-dynamic class types.
.. c:function:: void PyDict_Clear(PyObject *p)
@@ -209,3 +209,10 @@ Dictionary Objects
for key, value in seq2:
if override or key not in a:
a[key] = value
+
+
+.. c:function:: int PyDict_ClearFreeList()
+
+ Clear the free list. Return the total number of freed items.
+
+ .. versionadded:: 3.3
diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst
index 6f13c80..9a66b7f 100644
--- a/Doc/c-api/exceptions.rst
+++ b/Doc/c-api/exceptions.rst
@@ -129,6 +129,41 @@ in various ways. There is a separate error indicator for each thread.
exception state.
+.. c:function:: void PyErr_GetExcInfo(PyObject **ptype, PyObject **pvalue, PyObject **ptraceback)
+
+ Retrieve the exception info, as known from ``sys.exc_info()``. This refers
+ to an exception that was already caught, not to an exception that was
+ freshly raised. Returns new references for the three objects, any of which
+ may be *NULL*. Does not modify the exception info state.
+
+ .. note::
+
+ This function is not normally used by code that wants to handle exceptions.
+ Rather, it can be used when code needs to save and restore the exception
+ state temporarily. Use :c:func:`PyErr_SetExcInfo` to restore or clear the
+ exception state.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: void PyErr_SetExcInfo(PyObject *type, PyObject *value, PyObject *traceback)
+
+ Set the exception info, as known from ``sys.exc_info()``. This refers
+ to an exception that was already caught, not to an exception that was
+ freshly raised. This function steals the references of the arguments.
+ To clear the exception state, pass *NULL* for all three arguments.
+ For general rules about the three arguments, see :c:func:`PyErr_Restore`.
+
+ .. note::
+
+ This function is not normally used by code that wants to handle exceptions.
+ Rather, it can be used when code needs to save and restore the exception
+ state temporarily. Use :c:func:`PyErr_GetExcInfo` to read the exception
+ state.
+
+ .. versionadded:: 3.3
+
+
.. c:function:: void PyErr_SetString(PyObject *type, const char *message)
This is the most common way to set the error indicator. The first argument
@@ -230,6 +265,16 @@ in various ways. There is a separate error indicator for each thread.
parameter specifying the exception type to be raised. Availability: Windows.
+.. c:function:: PyObject* PyErr_SetImportError(PyObject *msg, PyObject *name, PyObject *path)
+
+ This is a convenience function to raise :exc:`ImportError`. *msg* will be
+ set as the exception's message string. *name* and *path*, both of which can
+ be ``NULL``, will be set as the :exc:`ImportError`'s respective ``name``
+ and ``path`` attributes.
+
+ .. versionadded:: 3.3
+
+
.. c:function:: void PyErr_SyntaxLocationEx(char *filename, int lineno, int col_offset)
Set file, line, and offset information for the current exception. If the
@@ -238,7 +283,7 @@ in various ways. There is a separate error indicator for each thread.
is a :exc:`SyntaxError`. *filename* is decoded from the filesystem encoding
(:func:`sys.getfilesystemencoding`).
-.. versionadded:: 3.2
+ .. versionadded:: 3.2
.. c:function:: void PyErr_SyntaxLocation(char *filename, int lineno)
@@ -311,6 +356,7 @@ in various ways. There is a separate error indicator for each thread.
.. versionadded:: 3.2
+
.. c:function:: int PyErr_CheckSignals()
.. index::
@@ -421,17 +467,18 @@ Exception Objects
.. c:function:: PyObject* PyException_GetCause(PyObject *ex)
- Return the cause (another exception instance set by ``raise ... from ...``)
- associated with the exception as a new reference, as accessible from Python
- through :attr:`__cause__`. If there is no cause associated, this returns
- *NULL*.
+ Return the cause (either an exception instance, or :const:`None`,
+ set by ``raise ... from ...``) associated with the exception as a new
+ reference, as accessible from Python through :attr:`__cause__`.
.. c:function:: void PyException_SetCause(PyObject *ex, PyObject *ctx)
Set the cause associated with the exception to *ctx*. Use *NULL* to clear
- it. There is no type check to make sure that *ctx* is an exception instance.
- This steals a reference to *ctx*.
+ it. There is no type check to make sure that *ctx* is either an exception
+ instance or :const:`None`. This steals a reference to *ctx*.
+
+ :attr:`__suppress_context__` is implicitly set to ``True`` by this function.
.. _unicodeexceptions:
@@ -525,7 +572,7 @@ recursion depth automatically).
Marks a point where a recursive C-level call is about to be performed.
- If :const:`USE_STACKCHECK` is defined, this function checks if the the OS
+ If :const:`USE_STACKCHECK` is defined, this function checks if the OS
stack overflowed using :c:func:`PyOS_CheckStack`. In this is the case, it
sets a :exc:`MemoryError` and returns a nonzero value.
@@ -582,65 +629,116 @@ All standard Python exceptions are available as global variables whose names are
:c:type:`PyObject\*`; they are all class objects. For completeness, here are all
the variables:
-+-------------------------------------+----------------------------+----------+
-| C Name | Python Name | Notes |
-+=====================================+============================+==========+
-| :c:data:`PyExc_BaseException` | :exc:`BaseException` | \(1) |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_Exception` | :exc:`Exception` | \(1) |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_ArithmeticError` | :exc:`ArithmeticError` | \(1) |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_LookupError` | :exc:`LookupError` | \(1) |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_AssertionError` | :exc:`AssertionError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_AttributeError` | :exc:`AttributeError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_EOFError` | :exc:`EOFError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_EnvironmentError` | :exc:`EnvironmentError` | \(1) |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_FloatingPointError` | :exc:`FloatingPointError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_IOError` | :exc:`IOError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_ImportError` | :exc:`ImportError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_IndexError` | :exc:`IndexError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_KeyError` | :exc:`KeyError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_KeyboardInterrupt` | :exc:`KeyboardInterrupt` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_MemoryError` | :exc:`MemoryError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_NameError` | :exc:`NameError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_NotImplementedError` | :exc:`NotImplementedError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_OSError` | :exc:`OSError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_OverflowError` | :exc:`OverflowError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_ReferenceError` | :exc:`ReferenceError` | \(2) |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_RuntimeError` | :exc:`RuntimeError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_SyntaxError` | :exc:`SyntaxError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_SystemError` | :exc:`SystemError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_SystemExit` | :exc:`SystemExit` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_TypeError` | :exc:`TypeError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_ValueError` | :exc:`ValueError` | |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_WindowsError` | :exc:`WindowsError` | \(3) |
-+-------------------------------------+----------------------------+----------+
-| :c:data:`PyExc_ZeroDivisionError` | :exc:`ZeroDivisionError` | |
-+-------------------------------------+----------------------------+----------+
++-----------------------------------------+---------------------------------+----------+
+| C Name | Python Name | Notes |
++=========================================+=================================+==========+
+| :c:data:`PyExc_BaseException` | :exc:`BaseException` | \(1) |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_Exception` | :exc:`Exception` | \(1) |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ArithmeticError` | :exc:`ArithmeticError` | \(1) |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_LookupError` | :exc:`LookupError` | \(1) |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_AssertionError` | :exc:`AssertionError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_AttributeError` | :exc:`AttributeError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_BlockingIOError` | :exc:`BlockingIOError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_BrokenPipeError` | :exc:`BrokenPipeError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ChildProcessError` | :exc:`ChildProcessError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ConnectionError` | :exc:`ConnectionError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ConnectionAbortedError` | :exc:`ConnectionAbortedError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ConnectionRefusedError` | :exc:`ConnectionRefusedError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ConnectionResetError` | :exc:`ConnectionResetError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_FileExistsError` | :exc:`FileExistsError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_FileNotFoundError` | :exc:`FileNotFoundError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_EOFError` | :exc:`EOFError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_FloatingPointError` | :exc:`FloatingPointError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ImportError` | :exc:`ImportError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_IndexError` | :exc:`IndexError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_InterruptedError` | :exc:`InterruptedError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_IsADirectoryError` | :exc:`IsADirectoryError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_KeyError` | :exc:`KeyError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_KeyboardInterrupt` | :exc:`KeyboardInterrupt` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_MemoryError` | :exc:`MemoryError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_NameError` | :exc:`NameError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_NotADirectoryError` | :exc:`NotADirectoryError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_NotImplementedError` | :exc:`NotImplementedError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_OSError` | :exc:`OSError` | \(1) |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_OverflowError` | :exc:`OverflowError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_PermissionError` | :exc:`PermissionError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ProcessLookupError` | :exc:`ProcessLookupError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ReferenceError` | :exc:`ReferenceError` | \(2) |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_RuntimeError` | :exc:`RuntimeError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_SyntaxError` | :exc:`SyntaxError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_SystemError` | :exc:`SystemError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_TimeoutError` | :exc:`TimeoutError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_SystemExit` | :exc:`SystemExit` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_TypeError` | :exc:`TypeError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ValueError` | :exc:`ValueError` | |
++-----------------------------------------+---------------------------------+----------+
+| :c:data:`PyExc_ZeroDivisionError` | :exc:`ZeroDivisionError` | |
++-----------------------------------------+---------------------------------+----------+
+
+.. versionadded:: 3.3
+ :c:data:`PyExc_BlockingIOError`, :c:data:`PyExc_BrokenPipeError`,
+ :c:data:`PyExc_ChildProcessError`, :c:data:`PyExc_ConnectionError`,
+ :c:data:`PyExc_ConnectionAbortedError`, :c:data:`PyExc_ConnectionRefusedError`,
+ :c:data:`PyExc_ConnectionResetError`, :c:data:`PyExc_FileExistsError`,
+ :c:data:`PyExc_FileNotFoundError`, :c:data:`PyExc_InterruptedError`,
+ :c:data:`PyExc_IsADirectoryError`, :c:data:`PyExc_NotADirectoryError`,
+ :c:data:`PyExc_PermissionError`, :c:data:`PyExc_ProcessLookupError`
+ and :c:data:`PyExc_TimeoutError` were introduced following :pep:`3151`.
+
+
+These are compatibility aliases to :c:data:`PyExc_OSError`:
+
++-------------------------------------+----------+
+| C Name | Notes |
++=====================================+==========+
+| :c:data:`PyExc_EnvironmentError` | |
++-------------------------------------+----------+
+| :c:data:`PyExc_IOError` | |
++-------------------------------------+----------+
+| :c:data:`PyExc_WindowsError` | \(3) |
++-------------------------------------+----------+
+
+.. versionchanged:: 3.3
+ These aliases used to be separate exception types.
+
.. index::
single: PyExc_BaseException
@@ -649,28 +747,42 @@ the variables:
single: PyExc_LookupError
single: PyExc_AssertionError
single: PyExc_AttributeError
+ single: PyExc_BlockingIOError
+ single: PyExc_BrokenPipeError
+ single: PyExc_ConnectionError
+ single: PyExc_ConnectionAbortedError
+ single: PyExc_ConnectionRefusedError
+ single: PyExc_ConnectionResetError
single: PyExc_EOFError
- single: PyExc_EnvironmentError
+ single: PyExc_FileExistsError
+ single: PyExc_FileNotFoundError
single: PyExc_FloatingPointError
- single: PyExc_IOError
single: PyExc_ImportError
single: PyExc_IndexError
+ single: PyExc_InterruptedError
+ single: PyExc_IsADirectoryError
single: PyExc_KeyError
single: PyExc_KeyboardInterrupt
single: PyExc_MemoryError
single: PyExc_NameError
+ single: PyExc_NotADirectoryError
single: PyExc_NotImplementedError
single: PyExc_OSError
single: PyExc_OverflowError
+ single: PyExc_PermissionError
+ single: PyExc_ProcessLookupError
single: PyExc_ReferenceError
single: PyExc_RuntimeError
single: PyExc_SyntaxError
single: PyExc_SystemError
single: PyExc_SystemExit
+ single: PyExc_TimeoutError
single: PyExc_TypeError
single: PyExc_ValueError
- single: PyExc_WindowsError
single: PyExc_ZeroDivisionError
+ single: PyExc_EnvironmentError
+ single: PyExc_IOError
+ single: PyExc_WindowsError
Notes:
diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst
index 31805fd..ad98322 100644
--- a/Doc/c-api/function.rst
+++ b/Doc/c-api/function.rst
@@ -38,6 +38,16 @@ There are a few functions specific to Python functions.
object, the argument defaults and closure are set to *NULL*.
+.. c:function:: PyObject* PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname)
+
+ As :c:func:`PyFunction_New`, but also allows to set the function object's
+ ``__qualname__`` attribute. *qualname* should be a unicode object or NULL;
+ if NULL, the ``__qualname__`` attribute is set to the same value as its
+ ``__name__`` attribute.
+
+ .. versionadded:: 3.3
+
+
.. c:function:: PyObject* PyFunction_GetCode(PyObject *op)
Return the code object associated with the function object *op*.
diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst
index cf48363..270152e 100644
--- a/Doc/c-api/import.rst
+++ b/Doc/c-api/import.rst
@@ -30,13 +30,13 @@ Importing Modules
.. c:function:: PyObject* PyImport_ImportModuleNoBlock(const char *name)
- This version of :c:func:`PyImport_ImportModule` does not block. It's intended
- to be used in C functions that import other modules to execute a function.
- The import may block if another thread holds the import lock. The function
- :c:func:`PyImport_ImportModuleNoBlock` never blocks. It first tries to fetch
- the module from sys.modules and falls back to :c:func:`PyImport_ImportModule`
- unless the lock is held, in which case the function will raise an
- :exc:`ImportError`.
+ This function is a deprecated alias of :c:func:`PyImport_ImportModule`.
+
+ .. versionchanged:: 3.3
+ This function used to fail immediately when the import lock was held
+ by another thread. In Python 3.3 though, the locking scheme switched
+ to per-module locks for most purposes, so this function's special
+ behaviour isn't needed anymore.
.. c:function:: PyObject* PyImport_ImportModuleEx(char *name, PyObject *globals, PyObject *locals, PyObject *fromlist)
@@ -44,8 +44,7 @@ Importing Modules
.. index:: builtin: __import__
Import a module. This is best described by referring to the built-in Python
- function :func:`__import__`, as the standard :func:`__import__` function calls
- this function directly.
+ function :func:`__import__`.
The return value is a new reference to the imported module or top-level
package, or *NULL* with an exception set on failure. Like for
@@ -57,7 +56,7 @@ Importing Modules
:c:func:`PyImport_ImportModule`.
-.. c:function:: PyObject* PyImport_ImportModuleLevel(char *name, PyObject *globals, PyObject *locals, PyObject *fromlist, int level)
+.. c:function:: PyObject* PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals, PyObject *locals, PyObject *fromlist, int level)
Import a module. This is best described by referring to the built-in Python
function :func:`__import__`, as the standard :func:`__import__` function calls
@@ -68,6 +67,16 @@ Importing Modules
the return value when a submodule of a package was requested is normally the
top-level package, unless a non-empty *fromlist* was given.
+ .. versionadded:: 3.3
+
+
+.. c:function:: PyObject* PyImport_ImportModuleLevel(char *name, PyObject *globals, PyObject *locals, PyObject *fromlist, int level)
+
+ Similar to :c:func:`PyImport_ImportModuleLevelObject`, but the name is an
+ UTF-8 encoded string instead of a Unicode object.
+
+ .. versionchanged:: 3.3
+ Negative values for *level* are no longer accepted.
.. c:function:: PyObject* PyImport_Import(PyObject *name)
@@ -86,7 +95,7 @@ Importing Modules
an exception set on failure (the module still exists in this case).
-.. c:function:: PyObject* PyImport_AddModule(const char *name)
+.. c:function:: PyObject* PyImport_AddModuleObject(PyObject *name)
Return the module object corresponding to a module name. The *name* argument
may be of the form ``package.module``. First check the modules dictionary if
@@ -100,6 +109,14 @@ Importing Modules
or one of its variants to import a module. Package structures implied by a
dotted name for *name* are not created if not already present.
+ .. versionadded:: 3.3
+
+
+.. c:function:: PyObject* PyImport_AddModule(const char *name)
+
+ Similar to :c:func:`PyImport_AddModuleObject`, but the name is a UTF-8
+ encoded string instead of a Unicode object.
+
.. c:function:: PyObject* PyImport_ExecCodeModule(char *name, PyObject *co)
@@ -136,25 +153,43 @@ Importing Modules
See also :c:func:`PyImport_ExecCodeModuleWithPathnames`.
-.. c:function:: PyObject* PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname, char *cpathname)
+.. c:function:: PyObject* PyImport_ExecCodeModuleObject(PyObject *name, PyObject *co, PyObject *pathname, PyObject *cpathname)
Like :c:func:`PyImport_ExecCodeModuleEx`, but the :attr:`__cached__`
attribute of the module object is set to *cpathname* if it is
non-``NULL``. Of the three functions, this is the preferred one to use.
+ .. versionadded:: 3.3
+
+
+.. c:function:: PyObject* PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname, char *cpathname)
+
+ Like :c:func:`PyImport_ExecCodeModuleObject`, but *name*, *pathname* and
+ *cpathname* are UTF-8 encoded strings. Attempts are also made to figure out
+ what the value for *pathname* should be from *cpathname* if the former is
+ set to ``NULL``.
+
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ Uses :func:`imp.source_from_cache()` in calculating the source path if
+ only the bytecode path is provided.
+
.. c:function:: long PyImport_GetMagicNumber()
Return the magic number for Python bytecode files (a.k.a. :file:`.pyc` and
:file:`.pyo` files). The magic number should be present in the first four bytes
- of the bytecode file, in little-endian byte order.
+ of the bytecode file, in little-endian byte order. Returns -1 on error.
+
+ .. versionchanged:: 3.3
+ Return value of -1 upon failure.
.. c:function:: const char * PyImport_GetMagicTag()
Return the magic tag string for :pep:`3147` format Python bytecode file
- names.
+ names. Keep in mind that the value at ``sys.implementation.cache_tag`` is
+ authoritative and should be used instead of this function.
.. versionadded:: 3.2
@@ -200,7 +235,7 @@ Importing Modules
For internal use only.
-.. c:function:: int PyImport_ImportFrozenModule(char *name)
+.. c:function:: int PyImport_ImportFrozenModuleObject(PyObject *name)
Load a frozen module named *name*. Return ``1`` for success, ``0`` if the
module is not found, and ``-1`` with an exception set if the initialization
@@ -208,6 +243,14 @@ Importing Modules
:c:func:`PyImport_ImportModule`. (Note the misnomer --- this function would
reload the module if it was already imported.)
+ .. versionadded:: 3.3
+
+
+.. c:function:: int PyImport_ImportFrozenModule(char *name)
+
+ Similar to :c:func:`PyImport_ImportFrozenModuleObject`, but the name is a
+ UTF-8 encoded string instead of a Unicode object.
+
.. c:type:: struct _frozen
@@ -247,13 +290,13 @@ Importing Modules
Structure describing a single entry in the list of built-in modules. Each of
these structures gives the name and initialization function for a module built
- into the interpreter. Programs which embed Python may use an array of these
- structures in conjunction with :c:func:`PyImport_ExtendInittab` to provide
- additional built-in modules. The structure is defined in
- :file:`Include/import.h` as::
+ into the interpreter. The name is an ASCII encoded string. Programs which
+ embed Python may use an array of these structures in conjunction with
+ :c:func:`PyImport_ExtendInittab` to provide additional built-in modules.
+ The structure is defined in :file:`Include/import.h` as::
struct _inittab {
- char *name;
+ char *name; /* ASCII encoded string */
PyObject* (*initfunc)(void);
};
diff --git a/Doc/c-api/index.rst b/Doc/c-api/index.rst
index 12a1ec7..a0cb6e5 100644
--- a/Doc/c-api/index.rst
+++ b/Doc/c-api/index.rst
@@ -25,3 +25,4 @@ document the API functions in detail.
init.rst
memory.rst
objimpl.rst
+ stable.rst
diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst
index 7507e3b..95ff4ee 100644
--- a/Doc/c-api/init.rst
+++ b/Doc/c-api/init.rst
@@ -646,7 +646,7 @@ with sub-interpreters:
:c:func:`PyGILState_Release` on the same thread.
-.. c:function:: PyThreadState PyGILState_GetThisThreadState()
+.. c:function:: PyThreadState* PyGILState_GetThisThreadState()
Get the current thread state for this thread. May return ``NULL`` if no
GILState API has been used on the current thread. Note that the main thread
diff --git a/Doc/c-api/list.rst b/Doc/c-api/list.rst
index feb9015..5b263a7 100644
--- a/Doc/c-api/list.rst
+++ b/Doc/c-api/list.rst
@@ -142,3 +142,10 @@ List Objects
Return a new tuple object containing the contents of *list*; equivalent to
``tuple(list)``.
+
+
+.. c:function:: int PyList_ClearFreeList()
+
+ Clear the free list. Return the total number of freed items.
+
+ .. versionadded:: 3.3
diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst
index e2f58ad..365abd7 100644
--- a/Doc/c-api/long.rst
+++ b/Doc/c-api/long.rst
@@ -100,6 +100,20 @@ All integers are implemented as "long" integer objects of arbitrary size.
string is first encoded to a byte string using :c:func:`PyUnicode_EncodeDecimal`
and then converted using :c:func:`PyLong_FromString`.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyLong_FromUnicodeObject`.
+
+
+.. c:function:: PyObject* PyLong_FromUnicodeObject(PyObject *u, int base)
+
+ Convert a sequence of Unicode digits in the string *u* to a Python integer
+ value. The Unicode string is first encoded to a byte string using
+ :c:func:`PyUnicode_EncodeDecimal` and then converted using
+ :c:func:`PyLong_FromString`.
+
+ .. versionadded:: 3.3
+
.. c:function:: PyObject* PyLong_FromVoidPtr(void *p)
diff --git a/Doc/c-api/memoryview.rst b/Doc/c-api/memoryview.rst
index 6b49cdf..5e50977 100644
--- a/Doc/c-api/memoryview.rst
+++ b/Doc/c-api/memoryview.rst
@@ -17,16 +17,21 @@ any other object.
Create a memoryview object from an object that provides the buffer interface.
If *obj* supports writable buffer exports, the memoryview object will be
- readable and writable, otherwise it will be read-only.
+ read/write, otherwise it may be either read-only or read/write at the
+ discretion of the exporter.
+.. c:function:: PyObject *PyMemoryView_FromMemory(char *mem, Py_ssize_t size, int flags)
+
+ Create a memoryview object using *mem* as the underlying buffer.
+ *flags* can be one of :c:macro:`PyBUF_READ` or :c:macro:`PyBUF_WRITE`.
+
+ .. versionadded:: 3.3
.. c:function:: PyObject *PyMemoryView_FromBuffer(Py_buffer *view)
Create a memoryview object wrapping the given buffer structure *view*.
- The memoryview object then owns the buffer represented by *view*, which
- means you shouldn't try to call :c:func:`PyBuffer_Release` yourself: it
- will be done on deallocation of the memoryview object.
-
+ For simple byte buffers, :c:func:`PyMemoryView_FromMemory` is the preferred
+ function.
.. c:function:: PyObject *PyMemoryView_GetContiguous(PyObject *obj, int buffertype, char order)
@@ -43,10 +48,16 @@ any other object.
currently allowed to create subclasses of :class:`memoryview`.
-.. c:function:: Py_buffer *PyMemoryView_GET_BUFFER(PyObject *obj)
+.. c:function:: Py_buffer *PyMemoryView_GET_BUFFER(PyObject *mview)
+
+ Return a pointer to the memoryview's private copy of the exporter's buffer.
+ *mview* **must** be a memoryview instance; this macro doesn't check its type,
+ you must do it yourself or you will risk crashes.
+
+.. c:function:: Py_buffer *PyMemoryView_GET_BASE(PyObject *mview)
- Return a pointer to the buffer structure wrapped by the given
- memoryview object. The object **must** be a memoryview instance;
- this macro doesn't check its type, you must do it yourself or you
- will risk crashes.
+ Return either a pointer to the exporting object that the memoryview is based
+ on or *NULL* if the memoryview has been created by one of the functions
+ :c:func:`PyMemoryView_FromMemory` or :c:func:`PyMemoryView_FromBuffer`.
+ *mview* **must** be a memoryview instance.
diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst
index ffd68e3..3be7fe3 100644
--- a/Doc/c-api/module.rst
+++ b/Doc/c-api/module.rst
@@ -29,7 +29,7 @@ There are only a few functions special to module objects.
:c:data:`PyModule_Type`.
-.. c:function:: PyObject* PyModule_New(const char *name)
+.. c:function:: PyObject* PyModule_NewObject(PyObject *name)
.. index::
single: __name__ (module attribute)
@@ -40,6 +40,14 @@ There are only a few functions special to module objects.
Only the module's :attr:`__doc__` and :attr:`__name__` attributes are filled in;
the caller is responsible for providing a :attr:`__file__` attribute.
+ .. versionadded:: 3.3
+
+
+.. c:function:: PyObject* PyModule_New(const char *name)
+
+ Similar to :c:func:`PyImport_NewObject`, but the name is an UTF-8 encoded
+ string instead of a Unicode object.
+
.. c:function:: PyObject* PyModule_GetDict(PyObject *module)
@@ -52,7 +60,7 @@ There are only a few functions special to module objects.
manipulate a module's :attr:`__dict__`.
-.. c:function:: char* PyModule_GetName(PyObject *module)
+.. c:function:: PyObject* PyModule_GetNameObject(PyObject *module)
.. index::
single: __name__ (module attribute)
@@ -61,15 +69,13 @@ There are only a few functions special to module objects.
Return *module*'s :attr:`__name__` value. If the module does not provide one,
or if it is not a string, :exc:`SystemError` is raised and *NULL* is returned.
+ .. versionadded:: 3.3
-.. c:function:: char* PyModule_GetFilename(PyObject *module)
- Similar to :c:func:`PyModule_GetFilenameObject` but return the filename
- encoded to 'utf-8'.
+.. c:function:: char* PyModule_GetName(PyObject *module)
- .. deprecated:: 3.2
- :c:func:`PyModule_GetFilename` raises :c:type:`UnicodeEncodeError` on
- unencodable filenames, use :c:func:`PyModule_GetFilenameObject` instead.
+ Similar to :c:func:`PyModule_GetNameObject` but return the name encoded to
+ ``'utf-8'``.
.. c:function:: PyObject* PyModule_GetFilenameObject(PyObject *module)
@@ -81,11 +87,21 @@ There are only a few functions special to module objects.
Return the name of the file from which *module* was loaded using *module*'s
:attr:`__file__` attribute. If this is not defined, or if it is not a
unicode string, raise :exc:`SystemError` and return *NULL*; otherwise return
- a reference to a :c:type:`PyUnicodeObject`.
+ a reference to a Unicode object.
.. versionadded:: 3.2
+.. c:function:: char* PyModule_GetFilename(PyObject *module)
+
+ Similar to :c:func:`PyModule_GetFilenameObject` but return the filename
+ encoded to 'utf-8'.
+
+ .. deprecated:: 3.2
+ :c:func:`PyModule_GetFilename` raises :c:type:`UnicodeEncodeError` on
+ unencodable filenames, use :c:func:`PyModule_GetFilenameObject` instead.
+
+
.. c:function:: void* PyModule_GetState(PyObject *module)
Return the "state" of the module, that is, a pointer to the block of memory
@@ -97,8 +113,28 @@ There are only a few functions special to module objects.
Return a pointer to the :c:type:`PyModuleDef` struct from which the module was
created, or *NULL* if the module wasn't created with
- :c:func:`PyModule_Create`.
+ :c:func:`PyModule_Create`.i
+
+.. c:function:: PyObject* PyState_FindModule(PyModuleDef *def)
+
+ Returns the module object that was created from *def* for the current interpreter.
+ This method requires that the module object has been attached to the interpreter state with
+ :c:func:`PyState_AddModule` beforehand. In case the corresponding module object is not
+ found or has not been attached to the interpreter state yet, it returns NULL.
+
+.. c:function:: int PyState_AddModule(PyModuleDef *def, PyObject *module)
+
+ Attaches the module object passed to the function to the interpreter state. This allows
+ the module object to be accessible via
+ :c:func:`PyState_FindModule`.
+
+ .. versionadded:: 3.3
+
+.. c:function:: int PyState_RemoveModule(PyModuleDef *def, PyObject *module)
+
+ Removes the module object created from *def* from the interpreter state.
+ .. versionadded:: 3.3
Initializing C modules
^^^^^^^^^^^^^^^^^^^^^^
diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst
index d0d45ad..d895547 100644
--- a/Doc/c-api/object.rst
+++ b/Doc/c-api/object.rst
@@ -6,6 +6,19 @@ Object Protocol
===============
+.. c:var:: PyObject* Py_NotImplemented
+
+ The ``NotImplemented`` singleton, used to signal that an operation is
+ not implemented for the given type combination.
+
+
+.. c:macro:: Py_RETURN_NOTIMPLEMENTED
+
+ Properly handle returning :c:data:`Py_NotImplemented` from within a C
+ function (that is, increment the reference count of NotImplemented and
+ return it).
+
+
.. c:function:: int PyObject_Print(PyObject *o, FILE *fp, int flags)
Print an object *o*, on file *fp*. Returns ``-1`` on error. The flags argument
@@ -88,6 +101,22 @@ Object Protocol
This is the equivalent of the Python statement ``del o.attr_name``.
+.. c:function:: PyObject* PyType_GenericGetDict(PyObject *o, void *context)
+
+ A generic implementation for the getter of a ``__dict__`` descriptor. It
+ creates the dictionary if necessary.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: int PyType_GenericSetDict(PyObject *o, void *context)
+
+ A generic implementation for the setter of a ``__dict__`` descriptor. This
+ implementation does not allow the dictionary to be deleted.
+
+ .. versionadded:: 3.3
+
+
.. c:function:: PyObject* PyObject_RichCompare(PyObject *o1, PyObject *o2, int opid)
Compare the values of *o1* and *o2* using the operation specified by *opid*,
diff --git a/Doc/c-api/set.rst b/Doc/c-api/set.rst
index 66b47c4..5f0ef90 100644
--- a/Doc/c-api/set.rst
+++ b/Doc/c-api/set.rst
@@ -157,3 +157,10 @@ subtypes but not for instances of :class:`frozenset` or its subtypes.
.. c:function:: int PySet_Clear(PyObject *set)
Empty an existing set of all elements.
+
+
+.. c:function:: int PySet_ClearFreeList()
+
+ Clear the free list. Return the total number of freed items.
+
+ .. versionadded:: 3.3
diff --git a/Doc/c-api/stable.rst b/Doc/c-api/stable.rst
new file mode 100644
index 0000000..2688c1c
--- /dev/null
+++ b/Doc/c-api/stable.rst
@@ -0,0 +1,39 @@
+.. highlightlang:: c
+
+.. _stable:
+
+**********************************
+Stable Appliction Binary Interface
+**********************************
+
+Traditionally, the C API of Python will change with every release.
+Most changes will be source-compatible, typically by only adding API,
+rather than changing existing API or removing API (although some
+interfaces do get removed after being deprecated first).
+
+Unfortunately, the API compatibility does not extend to binary
+compatibility (the ABI). The reason is primarily the evolution of
+struct definitions, where addition of a new field, or changing
+the type of a field, might not break the API, but can break the ABI.
+As a consequence, extension modules need to be recompiled for
+every Python release (although an exception is possible on Unix
+when none of the affected interfaces are used). In addition, on
+Windows, extension modules link with a specific pythonXY.dll and
+need to be recompiled to link with a newer one.
+
+Since Python 3.2, a subset of the API has been declared to guarantee
+a stable ABI. Extension modules wishing to use this API need to define
+Py_LIMITED_API. A number of interpreter details then become hidden
+from the extension module; in return, a module is built that works
+on any 3.x version (x>=2) without recompilation. In some cases, the
+stable ABI needs to be extended with new functions. Extensions modules
+wishing to use these new APIs need to set Py_LIMITED_API to the
+PY_VERSION_HEX value of the minimum Python version they want to
+support (e.g. 0x03030000 for Python 3.3). Such modules will work
+on all subsequent Python releases, but fail to load (because of
+missing symbols) on the older releases.
+
+As of Python 3.2, the set of functions available to the limited API
+is documented in PEP 384.
+
+.. XXX copy exact list here? Into each functions definition?
diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst
index b3386ea..44c731a 100644
--- a/Doc/c-api/type.rst
+++ b/Doc/c-api/type.rst
@@ -51,13 +51,13 @@ Type Objects
modification of the attributes or base classes of the type.
-.. c:function:: int PyType_HasFeature(PyObject *o, int feature)
+.. c:function:: int PyType_HasFeature(PyTypeObject *o, int feature)
Return true if the type object *o* sets the feature *feature*. Type features
are denoted by single bit flags.
-.. c:function:: int PyType_IS_GC(PyObject *o)
+.. c:function:: int PyType_IS_GC(PyTypeObject *o)
Return true if the type object includes support for the cycle detector; this
tests the type flag :const:`Py_TPFLAGS_HAVE_GC`.
@@ -70,13 +70,14 @@ Type Objects
.. c:function:: PyObject* PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems)
- XXX: Document.
-
+ Generic handler for the :attr:`tp_alloc` slot of a type object. Use
+ Python's default memory allocation mechanism to allocate a new instance and
+ initialize all its contents to *NULL*.
.. c:function:: PyObject* PyType_GenericNew(PyTypeObject *type, PyObject *args, PyObject *kwds)
- XXX: Document.
-
+ Generic handler for the :attr:`tp_new` slot of a type object. Create a
+ new instance using the type's :attr:`tp_alloc` slot.
.. c:function:: int PyType_Ready(PyTypeObject *type)
@@ -84,3 +85,15 @@ Type Objects
their initialization. This function is responsible for adding inherited slots
from a type's base class. Return ``0`` on success, or return ``-1`` and sets an
exception on error.
+
+.. c:function:: PyObject* PyType_FromSpec(PyType_Spec *spec)
+
+ Creates and returns a heap type object from the *spec* passed to the function.
+
+.. c:function:: PyObject* PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases)
+
+ Creates and returns a heap type object from the *spec*. In addition to that,
+ the created heap type contains all types contained by the *bases* tuple as base
+ types. This allows the caller to reference other heap types as base types.
+
+ .. versionadded:: 3.3
diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst
index 68ca9ad..ea1a0ad 100644
--- a/Doc/c-api/typeobj.rst
+++ b/Doc/c-api/typeobj.rst
@@ -1198,46 +1198,88 @@ Buffer Object Structures
.. sectionauthor:: Greg J. Stein <greg@lyra.org>
.. sectionauthor:: Benjamin Peterson
+.. sectionauthor:: Stefan Krah
+.. c:type:: PyBufferProcs
-The :ref:`buffer interface <bufferobjects>` exports a model where an object can expose its internal
-data.
+ This structure holds pointers to the functions required by the
+ :ref:`Buffer protocol <bufferobjects>`. The protocol defines how
+ an exporter object can expose its internal data to consumer objects.
-If an object does not export the buffer interface, then its :attr:`tp_as_buffer`
-member in the :c:type:`PyTypeObject` structure should be *NULL*. Otherwise, the
-:attr:`tp_as_buffer` will point to a :c:type:`PyBufferProcs` structure.
+.. c:member:: getbufferproc PyBufferProcs.bf_getbuffer
+ The signature of this function is::
-.. c:type:: PyBufferProcs
+ int (PyObject *exporter, Py_buffer *view, int flags);
+
+ Handle a request to *exporter* to fill in *view* as specified by *flags*.
+ Except for point (3), an implementation of this function MUST take these
+ steps:
+
+ (1) Check if the request can be met. If not, raise :c:data:`PyExc_BufferError`,
+ set :c:data:`view->obj` to *NULL* and return -1.
+
+ (2) Fill in the requested fields.
+
+ (3) Increment an internal counter for the number of exports.
+
+ (4) Set :c:data:`view->obj` to *exporter* and increment :c:data:`view->obj`.
+
+ (5) Return 0.
+
+ If *exporter* is part of a chain or tree of buffer providers, two main
+ schemes can be used:
+
+ * Re-export: Each member of the tree acts as the exporting object and
+ sets :c:data:`view->obj` to a new reference to itself.
+
+ * Redirect: The buffer request is redirected to the root object of the
+ tree. Here, :c:data:`view->obj` will be a new reference to the root
+ object.
+
+ The individual fields of *view* are described in section
+ :ref:`Buffer structure <buffer-structure>`, the rules how an exporter
+ must react to specific requests are in section
+ :ref:`Buffer request types <buffer-request-types>`.
+
+ All memory pointed to in the :c:type:`Py_buffer` structure belongs to
+ the exporter and must remain valid until there are no consumers left.
+ :c:member:`~Py_buffer.format`, :c:member:`~Py_buffer.shape`,
+ :c:member:`~Py_buffer.strides`, :c:member:`~Py_buffer.suboffsets`
+ and :c:member:`~Py_buffer.internal`
+ are read-only for the consumer.
+
+ :c:func:`PyBuffer_FillInfo` provides an easy way of exposing a simple
+ bytes buffer while dealing correctly with all request types.
+
+ :c:func:`PyObject_GetBuffer` is the interface for the consumer that
+ wraps this function.
+
+.. c:member:: releasebufferproc PyBufferProcs.bf_releasebuffer
+
+ The signature of this function is::
+
+ void (PyObject *exporter, Py_buffer *view);
- Structure used to hold the function pointers which define an implementation of
- the buffer protocol.
+ Handle a request to release the resources of the buffer. If no resources
+ need to be released, :c:member:`PyBufferProcs.bf_releasebuffer` may be
+ *NULL*. Otherwise, a standard implementation of this function will take
+ these optional steps:
- .. c:member:: getbufferproc bf_getbuffer
+ (1) Decrement an internal counter for the number of exports.
- This should fill a :c:type:`Py_buffer` with the necessary data for
- exporting the type. The signature of :data:`getbufferproc` is ``int
- (PyObject *obj, Py_buffer *view, int flags)``. *obj* is the object to
- export, *view* is the :c:type:`Py_buffer` struct to fill, and *flags* gives
- the conditions the caller wants the memory under. (See
- :c:func:`PyObject_GetBuffer` for all flags.) :c:member:`bf_getbuffer` is
- responsible for filling *view* with the appropriate information.
- (:c:func:`PyBuffer_FillView` can be used in simple cases.) See
- :c:type:`Py_buffer`\s docs for what needs to be filled in.
+ (2) If the counter is 0, free all memory associated with *view*.
+ The exporter MUST use the :c:member:`~Py_buffer.internal` field to keep
+ track of buffer-specific resources. This field is guaranteed to remain
+ constant, while a consumer MAY pass a copy of the original buffer as the
+ *view* argument.
- .. c:member:: releasebufferproc bf_releasebuffer
- This should release the resources of the buffer. The signature of
- :c:data:`releasebufferproc` is ``void (PyObject *obj, Py_buffer *view)``.
- If the :c:data:`bf_releasebuffer` function is not provided (i.e. it is
- *NULL*), then it does not ever need to be called.
+ This function MUST NOT decrement :c:data:`view->obj`, since that is
+ done automatically in :c:func:`PyBuffer_Release` (this scheme is
+ useful for breaking reference cycles).
- The exporter of the buffer interface must make sure that any memory
- pointed to in the :c:type:`Py_buffer` structure remains valid until
- releasebuffer is called. Exporters will need to define a
- :c:data:`bf_releasebuffer` function if they can re-allocate their memory,
- strides, shape, suboffsets, or format variables which they might share
- through the struct bufferinfo.
- See :c:func:`PyBuffer_Release`.
+ :c:func:`PyBuffer_Release` is the interface for the consumer that
+ wraps this function.
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index d1b57d9..2ac51df 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -6,38 +6,72 @@ Unicode Objects and Codecs
--------------------------
.. sectionauthor:: Marc-André Lemburg <mal@lemburg.com>
+.. sectionauthor:: Georg Brandl <georg@python.org>
Unicode Objects
^^^^^^^^^^^^^^^
+Since the implementation of :pep:`393` in Python 3.3, Unicode objects internally
+use a variety of representations, in order to allow handling the complete range
+of Unicode characters while staying memory efficient. There are special cases
+for strings where all code points are below 128, 256, or 65536; otherwise, code
+points must be below 1114112 (which is the full Unicode range).
+
+:c:type:`Py_UNICODE*` and UTF-8 representations are created on demand and cached
+in the Unicode object. The :c:type:`Py_UNICODE*` representation is deprecated
+and inefficient; it should be avoided in performance- or memory-sensitive
+situations.
+
+Due to the transition between the old APIs and the new APIs, unicode objects
+can internally be in two states depending on how they were created:
+
+* "canonical" unicode objects are all objects created by a non-deprecated
+ unicode API. They use the most efficient representation allowed by the
+ implementation.
+
+* "legacy" unicode objects have been created through one of the deprecated
+ APIs (typically :c:func:`PyUnicode_FromUnicode`) and only bear the
+ :c:type:`Py_UNICODE*` representation; you will have to call
+ :c:func:`PyUnicode_READY` on them before calling any other API.
+
+
Unicode Type
""""""""""""
These are the basic Unicode object types used for the Unicode implementation in
Python:
+.. c:type:: Py_UCS4
+ Py_UCS2
+ Py_UCS1
+
+ These types are typedefs for unsigned integer types wide enough to contain
+ characters of 32 bits, 16 bits and 8 bits, respectively. When dealing with
+ single Unicode characters, use :c:type:`Py_UCS4`.
+
+ .. versionadded:: 3.3
+
.. c:type:: Py_UNICODE
- This type represents the storage type which is used by Python internally as
- basis for holding Unicode ordinals. Python's default builds use a 16-bit type
- for :c:type:`Py_UNICODE` and store Unicode values internally as UCS2. It is also
- possible to build a UCS4 version of Python (most recent Linux distributions come
- with UCS4 builds of Python). These builds then use a 32-bit type for
- :c:type:`Py_UNICODE` and store Unicode data internally as UCS4. On platforms
- where :c:type:`wchar_t` is available and compatible with the chosen Python
- Unicode build variant, :c:type:`Py_UNICODE` is a typedef alias for
- :c:type:`wchar_t` to enhance native platform compatibility. On all other
- platforms, :c:type:`Py_UNICODE` is a typedef alias for either :c:type:`unsigned
- short` (UCS2) or :c:type:`unsigned long` (UCS4).
+ This is a typedef of :c:type:`wchar_t`, which is a 16-bit type or 32-bit type
+ depending on the platform.
-Note that UCS2 and UCS4 Python builds are not binary compatible. Please keep
-this in mind when writing extensions or interfaces.
+ .. versionchanged:: 3.3
+ In previous versions, this was a 16-bit type or a 32-bit type depending on
+ whether you selected a "narrow" or "wide" Unicode version of Python at
+ build time.
-.. c:type:: PyUnicodeObject
+.. c:type:: PyASCIIObject
+ PyCompactUnicodeObject
+ PyUnicodeObject
- This subtype of :c:type:`PyObject` represents a Python Unicode object.
+ These subtypes of :c:type:`PyObject` represent a Python Unicode object. In
+ almost all cases, they shouldn't be used directly, since all API functions
+ that deal with Unicode objects take and return :c:type:`PyObject` pointers.
+
+ .. versionadded:: 3.3
.. c:var:: PyTypeObject PyUnicode_Type
@@ -45,10 +79,10 @@ this in mind when writing extensions or interfaces.
This instance of :c:type:`PyTypeObject` represents the Python Unicode type. It
is exposed to Python code as ``str``.
+
The following APIs are really C macros and can be used to do fast checks and to
access internal read-only data of Unicode objects:
-
.. c:function:: int PyUnicode_Check(PyObject *o)
Return true if the object *o* is a Unicode object or an instance of a Unicode
@@ -61,28 +95,106 @@ access internal read-only data of Unicode objects:
subtype.
-.. c:function:: Py_ssize_t PyUnicode_GET_SIZE(PyObject *o)
+.. c:function:: int PyUnicode_READY(PyObject *o)
- Return the size of the object. *o* has to be a :c:type:`PyUnicodeObject` (not
- checked).
+ Ensure the string object *o* is in the "canonical" representation. This is
+ required before using any of the access macros described below.
+ .. XXX expand on when it is not required
-.. c:function:: Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *o)
+ Returns 0 on success and -1 with an exception set on failure, which in
+ particular happens if memory allocation fails.
- Return the size of the object's internal buffer in bytes. *o* has to be a
- :c:type:`PyUnicodeObject` (not checked).
+ .. versionadded:: 3.3
-.. c:function:: Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *o)
+.. c:function:: Py_ssize_t PyUnicode_GET_LENGTH(PyObject *o)
+
+ Return the length of the Unicode string, in code points. *o* has to be a
+ Unicode object in the "canonical" representation (not checked).
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: Py_UCS1* PyUnicode_1BYTE_DATA(PyObject *o)
+ Py_UCS2* PyUnicode_2BYTE_DATA(PyObject *o)
+ Py_UCS4* PyUnicode_4BYTE_DATA(PyObject *o)
+
+ Return a pointer to the canonical representation cast to UCS1, UCS2 or UCS4
+ integer types for direct character access. No checks are performed if the
+ canonical representation has the correct character size; use
+ :c:func:`PyUnicode_KIND` to select the right macro. Make sure
+ :c:func:`PyUnicode_READY` has been called before accessing this.
+
+ .. versionadded:: 3.3
+
+
+.. c:macro:: PyUnicode_WCHAR_KIND
+ PyUnicode_1BYTE_KIND
+ PyUnicode_2BYTE_KIND
+ PyUnicode_4BYTE_KIND
+
+ Return values of the :c:func:`PyUnicode_KIND` macro.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: int PyUnicode_KIND(PyObject *o)
+
+ Return one of the PyUnicode kind constants (see above) that indicate how many
+ bytes per character this Unicode object uses to store its data. *o* has to
+ be a Unicode object in the "canonical" representation (not checked).
+
+ .. XXX document "0" return value?
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: void* PyUnicode_DATA(PyObject *o)
+
+ Return a void pointer to the raw unicode buffer. *o* has to be a Unicode
+ object in the "canonical" representation (not checked).
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: void PyUnicode_WRITE(int kind, void *data, Py_ssize_t index, \
+ Py_UCS4 value)
+
+ Write into a canonical representation *data* (as obtained with
+ :c:func:`PyUnicode_DATA`). This macro does not do any sanity checks and is
+ intended for usage in loops. The caller should cache the *kind* value and
+ *data* pointer as obtained from other macro calls. *index* is the index in
+ the string (starts at 0) and *value* is the new code point value which should
+ be written to that location.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: Py_UCS4 PyUnicode_READ(int kind, void *data, Py_ssize_t index)
+
+ Read a code point from a canonical representation *data* (as obtained with
+ :c:func:`PyUnicode_DATA`). No checks or ready calls are performed.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: Py_UCS4 PyUnicode_READ_CHAR(PyObject *o, Py_ssize_t index)
+
+ Read a character from a Unicode object *o*, which must be in the "canonical"
+ representation. This is less efficient than :c:func:`PyUnicode_READ` if you
+ do multiple consecutive reads.
+
+ .. versionadded:: 3.3
- Return a pointer to the internal :c:type:`Py_UNICODE` buffer of the object. *o*
- has to be a :c:type:`PyUnicodeObject` (not checked).
+.. c:function:: PyUnicode_MAX_CHAR_VALUE(PyObject *o)
-.. c:function:: const char* PyUnicode_AS_DATA(PyObject *o)
+ Return the maximum code point that is suitable for creating another string
+ based on *o*, which must be in the "canonical" representation. This is
+ always an approximation but more efficient than iterating over the string.
- Return a pointer to the internal buffer of the object. *o* has to be a
- :c:type:`PyUnicodeObject` (not checked).
+ .. versionadded:: 3.3
.. c:function:: int PyUnicode_ClearFreeList()
@@ -90,6 +202,46 @@ access internal read-only data of Unicode objects:
Clear the free list. Return the total number of freed items.
+.. c:function:: Py_ssize_t PyUnicode_GET_SIZE(PyObject *o)
+
+ Return the size of the deprecated :c:type:`Py_UNICODE` representation, in
+ code units (this includes surrogate pairs as 2 units). *o* has to be a
+ Unicode object (not checked).
+
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style Unicode API, please migrate to using
+ :c:func:`PyUnicode_GET_LENGTH`.
+
+
+.. c:function:: Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *o)
+
+ Return the size of the deprecated :c:type:`Py_UNICODE` representation in
+ bytes. *o* has to be a Unicode object (not checked).
+
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style Unicode API, please migrate to using
+ :c:func:`PyUnicode_GET_LENGTH`.
+
+
+.. c:function:: Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *o)
+ const char* PyUnicode_AS_DATA(PyObject *o)
+
+ Return a pointer to a :c:type:`Py_UNICODE` representation of the object. The
+ ``AS_DATA`` form casts the pointer to :c:type:`const char *`. *o* has to be
+ a Unicode object (not checked).
+
+ .. versionchanged:: 3.3
+ This macro is now inefficient -- because in many cases the
+ :c:type:`Py_UNICODE` representation does not exist and needs to be created
+ -- and can fail (return *NULL* with an exception set). Try to port the
+ code to use the new :c:func:`PyUnicode_nBYTE_DATA` macros or use
+ :c:func:`PyUnicode_WRITE` or :c:func:`PyUnicode_READ`.
+
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style Unicode API, please migrate to using the
+ :c:func:`PyUnicode_nBYTE_DATA` family of macros.
+
+
Unicode Character Properties
""""""""""""""""""""""""""""
@@ -166,16 +318,25 @@ These APIs can be used for fast direct character conversions:
Return the character *ch* converted to lower case.
+ .. deprecated:: 3.3
+ This function uses simple case mappings.
+
.. c:function:: Py_UNICODE Py_UNICODE_TOUPPER(Py_UNICODE ch)
Return the character *ch* converted to upper case.
+ .. deprecated:: 3.3
+ This function uses simple case mappings.
+
.. c:function:: Py_UNICODE Py_UNICODE_TOTITLE(Py_UNICODE ch)
Return the character *ch* converted to title case.
+ .. deprecated:: 3.3
+ This function uses simple case mappings.
+
.. c:function:: int Py_UNICODE_TODECIMAL(Py_UNICODE ch)
@@ -195,31 +356,66 @@ These APIs can be used for fast direct character conversions:
possible. This macro does not raise exceptions.
-Plain Py_UNICODE
-""""""""""""""""
+These APIs can be used to work with surrogates:
+
+.. c:macro:: Py_UNICODE_IS_SURROGATE(ch)
+
+ Check if *ch* is a surrogate (``0xD800 <= ch <= 0xDFFF``).
+
+.. c:macro:: Py_UNICODE_IS_HIGH_SURROGATE(ch)
+
+ Check if *ch* is an high surrogate (``0xD800 <= ch <= 0xDBFF``).
+
+.. c:macro:: Py_UNICODE_IS_LOW_SURROGATE(ch)
+
+ Check if *ch* is a low surrogate (``0xDC00 <= ch <= 0xDFFF``).
+
+.. c:macro:: Py_UNICODE_JOIN_SURROGATES(high, low)
+
+ Join two surrogate characters and return a single Py_UCS4 value.
+ *high* and *low* are respectively the leading and trailing surrogates in a
+ surrogate pair.
+
+
+Creating and accessing Unicode strings
+""""""""""""""""""""""""""""""""""""""
To create Unicode objects and access their basic sequence properties, use these
APIs:
+.. c:function:: PyObject* PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
-.. c:function:: PyObject* PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
+ Create a new Unicode object. *maxchar* should be the true maximum code point
+ to be placed in the string. As an approximation, it can be rounded up to the
+ nearest value in the sequence 127, 255, 65535, 1114111.
- Create a Unicode object from the Py_UNICODE buffer *u* of the given size. *u*
- may be *NULL* which causes the contents to be undefined. It is the user's
- responsibility to fill in the needed data. The buffer is copied into the new
- object. If the buffer is not *NULL*, the return value might be a shared object.
- Therefore, modification of the resulting Unicode object is only allowed when *u*
- is *NULL*.
+ This is the recommended way to allocate a new Unicode object. Objects
+ created using this function are not resizable.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: PyObject* PyUnicode_FromKindAndData(int kind, const void *buffer, \
+ Py_ssize_t size)
+
+ Create a new Unicode object with the given *kind* (possible values are
+ :c:macro:`PyUnicode_1BYTE_KIND` etc., as returned by
+ :c:func:`PyUnicode_KIND`). The *buffer* must point to an array of *size*
+ units of 1, 2 or 4 bytes per character, as given by the kind.
+
+ .. versionadded:: 3.3
.. c:function:: PyObject* PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
- Create a Unicode object from the char buffer *u*. The bytes will be interpreted
- as being UTF-8 encoded. *u* may also be *NULL* which
- causes the contents to be undefined. It is the user's responsibility to fill in
- the needed data. The buffer is copied into the new object. If the buffer is not
- *NULL*, the return value might be a shared object. Therefore, modification of
- the resulting Unicode object is only allowed when *u* is *NULL*.
+ Create a Unicode object from the char buffer *u*. The bytes will be
+ interpreted as being UTF-8 encoded. The buffer is copied into the new
+ object. If the buffer is not *NULL*, the return value might be a shared
+ object, i.e. modification of the data is not allowed.
+
+ If *u* is *NULL*, this function behaves like :c:func:`PyUnicode_FromUnicode`
+ with the buffer set to *NULL*. This usage is deprecated in favor of
+ :c:func:`PyUnicode_New`.
.. c:function:: PyObject *PyUnicode_FromString(const char *u)
@@ -260,18 +456,27 @@ APIs:
| :attr:`%ld` | long | Exactly equivalent to |
| | | ``printf("%ld")``. |
+-------------------+---------------------+--------------------------------+
+ | :attr:`%li` | long | Exactly equivalent to |
+ | | | ``printf("%li")``. |
+ +-------------------+---------------------+--------------------------------+
| :attr:`%lu` | unsigned long | Exactly equivalent to |
| | | ``printf("%lu")``. |
+-------------------+---------------------+--------------------------------+
| :attr:`%lld` | long long | Exactly equivalent to |
| | | ``printf("%lld")``. |
+-------------------+---------------------+--------------------------------+
+ | :attr:`%lli` | long long | Exactly equivalent to |
+ | | | ``printf("%lli")``. |
+ +-------------------+---------------------+--------------------------------+
| :attr:`%llu` | unsigned long long | Exactly equivalent to |
| | | ``printf("%llu")``. |
+-------------------+---------------------+--------------------------------+
| :attr:`%zd` | Py_ssize_t | Exactly equivalent to |
| | | ``printf("%zd")``. |
+-------------------+---------------------+--------------------------------+
+ | :attr:`%zi` | Py_ssize_t | Exactly equivalent to |
+ | | | ``printf("%zi")``. |
+ +-------------------+---------------------+--------------------------------+
| :attr:`%zu` | size_t | Exactly equivalent to |
| | | ``printf("%zu")``. |
+-------------------+---------------------+--------------------------------+
@@ -322,27 +527,178 @@ APIs:
.. versionchanged:: 3.2
Support for ``"%lld"`` and ``"%llu"`` added.
+ .. versionchanged:: 3.3
+ Support for ``"%li"``, ``"%lli"`` and ``"%zi"`` added.
+
.. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs)
Identical to :c:func:`PyUnicode_FromFormat` except that it takes exactly two
arguments.
+
+.. c:function:: PyObject* PyUnicode_FromEncodedObject(PyObject *obj, \
+ const char *encoding, const char *errors)
+
+ Coerce an encoded object *obj* to an Unicode object and return a reference with
+ incremented refcount.
+
+ :class:`bytes`, :class:`bytearray` and other char buffer compatible objects
+ are decoded according to the given *encoding* and using the error handling
+ defined by *errors*. Both can be *NULL* to have the interface use the default
+ values (see the next section for details).
+
+ All other objects, including Unicode objects, cause a :exc:`TypeError` to be
+ set.
+
+ The API returns *NULL* if there was an error. The caller is responsible for
+ decref'ing the returned objects.
+
+
+.. c:function:: Py_ssize_t PyUnicode_GetLength(PyObject *unicode)
+
+ Return the length of the Unicode object, in code points.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: int PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, \
+ PyObject *to, Py_ssize_t from_start, Py_ssize_t how_many)
+
+ Copy characters from one Unicode object into another. This function performs
+ character conversion when necessary and falls back to :c:func:`memcpy` if
+ possible. Returns ``-1`` and sets an exception on error, otherwise returns
+ ``0``.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: Py_ssize_t PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, \
+ Py_ssize_t length, Py_UCS4 fill_char)
+
+ Fill a string with a character: write *fill_char* into
+ ``unicode[start:start+length]``.
+
+ Fail if *fill_char* is bigger than the string maximum character, or if the
+ string has more than 1 reference.
+
+ Return the number of written character, or return ``-1`` and raise an
+ exception on error.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: int PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, \
+ Py_UCS4 character)
+
+ Write a character to a string. The string must have been created through
+ :c:func:`PyUnicode_New`. Since Unicode strings are supposed to be immutable,
+ the string must not be shared, or have been hashed yet.
+
+ This function checks that *unicode* is a Unicode object, that the index is
+ not out of bounds, and that the object can be modified safely (i.e. that it
+ its reference count is one), in contrast to the macro version
+ :c:func:`PyUnicode_WRITE_CHAR`.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: Py_UCS4 PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
+
+ Read a character from a string. This function checks that *unicode* is a
+ Unicode object and the index is not out of bounds, in contrast to the macro
+ version :c:func:`PyUnicode_READ_CHAR`.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: PyObject* PyUnicode_Substring(PyObject *str, Py_ssize_t start, \
+ Py_ssize_t end)
+
+ Return a substring of *str*, from character index *start* (included) to
+ character index *end* (excluded). Negative indices are not supported.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: Py_UCS4* PyUnicode_AsUCS4(PyObject *u, Py_UCS4 *buffer, \
+ Py_ssize_t buflen, int copy_null)
+
+ Copy the string *u* into a UCS4 buffer, including a null character, if
+ *copy_null* is set. Returns *NULL* and sets an exception on error (in
+ particular, a :exc:`ValueError` if *buflen* is smaller than the length of
+ *u*). *buffer* is returned on success.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: Py_UCS4* PyUnicode_AsUCS4Copy(PyObject *u)
+
+ Copy the string *u* into a new UCS4 buffer that is allocated using
+ :c:func:`PyMem_Malloc`. If this fails, *NULL* is returned with a
+ :exc:`MemoryError` set.
+
+ .. versionadded:: 3.3
+
+
+Deprecated Py_UNICODE APIs
+""""""""""""""""""""""""""
+
+.. deprecated-removed:: 3.3 4.0
+
+These API functions are deprecated with the implementation of :pep:`393`.
+Extension modules can continue using them, as they will not be removed in Python
+3.x, but need to be aware that their use can now cause performance and memory hits.
+
+
+.. c:function:: PyObject* PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
+
+ Create a Unicode object from the Py_UNICODE buffer *u* of the given size. *u*
+ may be *NULL* which causes the contents to be undefined. It is the user's
+ responsibility to fill in the needed data. The buffer is copied into the new
+ object.
+
+ If the buffer is not *NULL*, the return value might be a shared object.
+ Therefore, modification of the resulting Unicode object is only allowed when
+ *u* is *NULL*.
+
+ If the buffer is *NULL*, :c:func:`PyUnicode_READY` must be called once the
+ string content has been filled before using any of the access macros such as
+ :c:func:`PyUnicode_KIND`.
+
+ Please migrate to using :c:func:`PyUnicode_FromKindAndData` or
+ :c:func:`PyUnicode_New`.
+
+
+.. c:function:: Py_UNICODE* PyUnicode_AsUnicode(PyObject *unicode)
+
+ Return a read-only pointer to the Unicode object's internal
+ :c:type:`Py_UNICODE` buffer, or *NULL* on error. This will create the
+ :c:type:`Py_UNICODE*` representation of the object if it is not yet
+ available. Note that the resulting :c:type:`Py_UNICODE` string may contain
+ embedded null characters, which would cause the string to be truncated when
+ used in most C functions.
+
+ Please migrate to using :c:func:`PyUnicode_AsUCS4`,
+ :c:func:`PyUnicode_Substring`, :c:func:`PyUnicode_ReadChar` or similar new
+ APIs.
+
+
.. c:function:: PyObject* PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, Py_ssize_t size)
Create a Unicode object by replacing all decimal digits in
:c:type:`Py_UNICODE` buffer of the given *size* by ASCII digits 0--9
- according to their decimal value. Return *NULL* if an exception
- occurs.
+ according to their decimal value. Return *NULL* if an exception occurs.
-.. c:function:: Py_UNICODE* PyUnicode_AsUnicode(PyObject *unicode)
+.. c:function:: Py_UNICODE* PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
- Return a read-only pointer to the Unicode object's internal
- :c:type:`Py_UNICODE` buffer, *NULL* if *unicode* is not a Unicode object.
- Note that the resulting :c:type:`Py_UNICODE*` string may contain embedded
- null characters, which would cause the string to be truncated when used in
- most C functions.
+ Like :c:func:`PyUnicode_AsUnicode`, but also saves the :c:func:`Py_UNICODE`
+ array length in *size*. Note that the resulting :c:type:`Py_UNICODE*` string
+ may contain embedded null characters, which would cause the string to be
+ truncated when used in most C functions.
+
+ .. versionadded:: 3.3
.. c:function:: Py_UNICODE* PyUnicode_AsUnicodeCopy(PyObject *unicode)
@@ -350,44 +706,76 @@ APIs:
Create a copy of a Unicode string ending with a nul character. Return *NULL*
and raise a :exc:`MemoryError` exception on memory allocation failure,
otherwise return a new allocated buffer (use :c:func:`PyMem_Free` to free
- the buffer). Note that the resulting :c:type:`Py_UNICODE*` string may contain
- embedded null characters, which would cause the string to be truncated when
- used in most C functions.
+ the buffer). Note that the resulting :c:type:`Py_UNICODE*` string may
+ contain embedded null characters, which would cause the string to be
+ truncated when used in most C functions.
.. versionadded:: 3.2
+ Please migrate to using :c:func:`PyUnicode_AsUCS4Copy` or similar new APIs.
+
.. c:function:: Py_ssize_t PyUnicode_GetSize(PyObject *unicode)
- Return the length of the Unicode object.
+ Return the size of the deprecated :c:type:`Py_UNICODE` representation, in
+ code units (this includes surrogate pairs as 2 units).
+ Please migrate to using :c:func:`PyUnicode_GetLength`.
-.. c:function:: PyObject* PyUnicode_FromEncodedObject(PyObject *obj, const char *encoding, const char *errors)
- Coerce an encoded object *obj* to an Unicode object and return a reference with
- incremented refcount.
+.. c:function:: PyObject* PyUnicode_FromObject(PyObject *obj)
- :class:`bytes`, :class:`bytearray` and other char buffer compatible objects
- are decoded according to the given *encoding* and using the error handling
- defined by *errors*. Both can be *NULL* to have the interface use the default
- values (see the next section for details).
+ Shortcut for ``PyUnicode_FromEncodedObject(obj, NULL, "strict")`` which is used
+ throughout the interpreter whenever coercion to Unicode is needed.
- All other objects, including Unicode objects, cause a :exc:`TypeError` to be
- set.
- The API returns *NULL* if there was an error. The caller is responsible for
- decref'ing the returned objects.
+Locale Encoding
+"""""""""""""""
+The current locale encoding can be used to decode text from the operating
+system.
-.. c:function:: PyObject* PyUnicode_FromObject(PyObject *obj)
+.. c:function:: PyObject* PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, int surrogateescape)
+
+ Decode a string from the current locale encoding. The decoder is strict if
+ *surrogateescape* is equal to zero, otherwise it uses the
+ ``'surrogateescape'`` error handler (:pep:`383`) to escape undecodable
+ bytes. If a byte sequence can be decoded as a surrogate character and
+ *surrogateescape* is not equal to zero, the byte sequence is escaped using
+ the ``'surrogateescape'`` error handler instead of being decoded. *str*
+ must end with a null character but cannot contain embedded null characters.
+
+ .. seealso::
+
+ Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` to decode a string from
+ :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
+ Python startup).
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: PyObject* PyUnicode_DecodeLocale(const char *str, int surrogateescape)
+
+ Similar to :c:func:`PyUnicode_DecodeLocaleAndSize`, but compute the string
+ length using :c:func:`strlen`.
+
+ .. versionadded:: 3.3
- Shortcut for ``PyUnicode_FromEncodedObject(obj, NULL, "strict")`` which is used
- throughout the interpreter whenever coercion to Unicode is needed.
-If the platform supports :c:type:`wchar_t` and provides a header file wchar.h,
-Python can interface directly to this type using the following functions.
-Support is optimized if Python's own :c:type:`Py_UNICODE` type is identical to
-the system's :c:type:`wchar_t`.
+.. c:function:: PyObject* PyUnicode_EncodeLocale(PyObject *unicode, int surrogateescape)
+
+ Encode a Unicode object to the current locale encoding. The encoder is
+ strict if *surrogateescape* is equal to zero, otherwise it uses the
+ ``'surrogateescape'`` error handler (:pep:`383`). Return a :class:`bytes`
+ object. *str* cannot contain embedded null characters.
+
+ .. seealso::
+
+ Use :c:func:`PyUnicode_EncodeFSDefault` to encode a string to
+ :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
+ Python startup).
+
+ .. versionadded:: 3.3
File System Encoding
@@ -430,6 +818,13 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
locale encoding.
+ .. seealso::
+
+ :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
+ locale encoding and cannot be modified later. If you need to decode a
+ string from the current locale encoding, use
+ :c:func:`PyUnicode_DecodeLocaleAndSize`.
+
.. versionchanged:: 3.2
Use ``'strict'`` error handler on Windows.
@@ -458,6 +853,13 @@ used, passing :c:func:`PyUnicode_FSDecoder` as the conversion function:
If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
locale encoding.
+ .. seealso::
+
+ :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
+ locale encoding and cannot be modified later. If you need to encode a
+ string to the current locale encoding, use
+ :c:func:`PyUnicode_EncodeLocale`.
+
.. versionadded:: 3.2
@@ -479,9 +881,9 @@ wchar_t Support
Copy the Unicode object contents into the :c:type:`wchar_t` buffer *w*. At most
*size* :c:type:`wchar_t` characters are copied (excluding a possibly trailing
0-termination character). Return the number of :c:type:`wchar_t` characters
- copied or -1 in case of an error. Note that the resulting :c:type:`wchar_t`
+ copied or -1 in case of an error. Note that the resulting :c:type:`wchar_t*`
string may or may not be 0-terminated. It is the responsibility of the caller
- to make sure that the :c:type:`wchar_t` string is 0-terminated in case this is
+ to make sure that the :c:type:`wchar_t*` string is 0-terminated in case this is
required by the application. Also, note that the :c:type:`wchar_t*` string
might contain null characters, which would cause the string to be truncated
when used with most C functions.
@@ -497,12 +899,32 @@ wchar_t Support
Returns a buffer allocated by :c:func:`PyMem_Alloc` (use
:c:func:`PyMem_Free` to free it) on success. On error, returns *NULL*,
*\*size* is undefined and raises a :exc:`MemoryError`. Note that the
- resulting :c:type:`wchar_t*` string might contain null characters, which
+ resulting :c:type:`wchar_t` string might contain null characters, which
would cause the string to be truncated when used with most C functions.
.. versionadded:: 3.2
+UCS4 Support
+""""""""""""
+
+.. versionadded:: 3.3
+
+.. XXX are these meant to be public?
+
+.. c:function:: size_t Py_UCS4_strlen(const Py_UCS4 *u)
+ Py_UCS4* Py_UCS4_strcpy(Py_UCS4 *s1, const Py_UCS4 *s2)
+ Py_UCS4* Py_UCS4_strncpy(Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
+ Py_UCS4* Py_UCS4_strcat(Py_UCS4 *s1, const Py_UCS4 *s2)
+ int Py_UCS4_strcmp(const Py_UCS4 *s1, const Py_UCS4 *s2)
+ int Py_UCS4_strncmp(const Py_UCS4 *s1, const Py_UCS4 *s2, size_t n)
+ Py_UCS4* Py_UCS4_strchr(const Py_UCS4 *s, Py_UCS4 c)
+ Py_UCS4* Py_UCS4_strrchr(const Py_UCS4 *s, Py_UCS4 c)
+
+ These utility functions work on strings of :c:type:`Py_UCS4` characters and
+ otherwise behave like the C standard library functions with the same name.
+
+
.. _builtincodecs:
Built-in Codecs
@@ -537,7 +959,8 @@ Generic Codecs
These are the generic codec APIs:
-.. c:function:: PyObject* PyUnicode_Decode(const char *s, Py_ssize_t size, const char *encoding, const char *errors)
+.. c:function:: PyObject* PyUnicode_Decode(const char *s, Py_ssize_t size, \
+ const char *encoding, const char *errors)
Create a Unicode object by decoding *size* bytes of the encoded string *s*.
*encoding* and *errors* have the same meaning as the parameters of the same name
@@ -546,7 +969,18 @@ These are the generic codec APIs:
the codec.
-.. c:function:: PyObject* PyUnicode_Encode(const Py_UNICODE *s, Py_ssize_t size, const char *encoding, const char *errors)
+.. c:function:: PyObject* PyUnicode_AsEncodedString(PyObject *unicode, \
+ const char *encoding, const char *errors)
+
+ Encode a Unicode object and return the result as Python bytes object.
+ *encoding* and *errors* have the same meaning as the parameters of the same
+ name in the Unicode :meth:`encode` method. The codec to be used is looked up
+ using the Python codec registry. Return *NULL* if an exception was raised by
+ the codec.
+
+
+.. c:function:: PyObject* PyUnicode_Encode(const Py_UNICODE *s, Py_ssize_t size, \
+ const char *encoding, const char *errors)
Encode the :c:type:`Py_UNICODE` buffer *s* of the given *size* and return a Python
bytes object. *encoding* and *errors* have the same meaning as the
@@ -554,14 +988,9 @@ These are the generic codec APIs:
to be used is looked up using the Python codec registry. Return *NULL* if an
exception was raised by the codec.
-
-.. c:function:: PyObject* PyUnicode_AsEncodedString(PyObject *unicode, const char *encoding, const char *errors)
-
- Encode a Unicode object and return the result as Python bytes object.
- *encoding* and *errors* have the same meaning as the parameters of the same
- name in the Unicode :meth:`encode` method. The codec to be used is looked up
- using the Python codec registry. Return *NULL* if an exception was raised by
- the codec.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsEncodedString`.
UTF-8 Codecs
@@ -576,7 +1005,8 @@ These are the UTF-8 codec APIs:
*s*. Return *NULL* if an exception was raised by the codec.
-.. c:function:: PyObject* PyUnicode_DecodeUTF8Stateful(const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed)
+.. c:function:: PyObject* PyUnicode_DecodeUTF8Stateful(const char *s, Py_ssize_t size, \
+ const char *errors, Py_ssize_t *consumed)
If *consumed* is *NULL*, behave like :c:func:`PyUnicode_DecodeUTF8`. If
*consumed* is not *NULL*, trailing incomplete UTF-8 byte sequences will not be
@@ -584,18 +1014,45 @@ These are the UTF-8 codec APIs:
that have been decoded will be stored in *consumed*.
+.. c:function:: PyObject* PyUnicode_AsUTF8String(PyObject *unicode)
+
+ Encode a Unicode object using UTF-8 and return the result as Python bytes
+ object. Error handling is "strict". Return *NULL* if an exception was
+ raised by the codec.
+
+
+.. c:function:: char* PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *size)
+
+ Return a pointer to the default encoding (UTF-8) of the Unicode object, and
+ store the size of the encoded representation (in bytes) in *size*. *size*
+ can be *NULL*, in this case no size will be stored.
+
+ In the case of an error, *NULL* is returned with an exception set and no
+ *size* is stored.
+
+ This caches the UTF-8 representation of the string in the Unicode object, and
+ subsequent calls will return a pointer to the same buffer. The caller is not
+ responsible for deallocating the buffer.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: char* PyUnicode_AsUTF8(PyObject *unicode)
+
+ As :c:func:`PyUnicode_AsUTF8AndSize`, but does not store the size.
+
+ .. versionadded:: 3.3
+
+
.. c:function:: PyObject* PyUnicode_EncodeUTF8(const Py_UNICODE *s, Py_ssize_t size, const char *errors)
Encode the :c:type:`Py_UNICODE` buffer *s* of the given *size* using UTF-8 and
return a Python bytes object. Return *NULL* if an exception was raised by
the codec.
-
-.. c:function:: PyObject* PyUnicode_AsUTF8String(PyObject *unicode)
-
- Encode a Unicode object using UTF-8 and return the result as Python bytes
- object. Error handling is "strict". Return *NULL* if an exception was
- raised by the codec.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsUTF8String` or :c:func:`PyUnicode_AsUTF8AndSize`.
UTF-32 Codecs
@@ -604,7 +1061,8 @@ UTF-32 Codecs
These are the UTF-32 codec APIs:
-.. c:function:: PyObject* PyUnicode_DecodeUTF32(const char *s, Py_ssize_t size, const char *errors, int *byteorder)
+.. c:function:: PyObject* PyUnicode_DecodeUTF32(const char *s, Py_ssize_t size, \
+ const char *errors, int *byteorder)
Decode *size* bytes from a UTF-32 encoded buffer string and return the
corresponding Unicode object. *errors* (if non-*NULL*) defines the error
@@ -625,14 +1083,13 @@ These are the UTF-32 codec APIs:
After completion, *\*byteorder* is set to the current byte order at the end
of input data.
- In a narrow build codepoints outside the BMP will be decoded as surrogate pairs.
-
If *byteorder* is *NULL*, the codec starts in native order mode.
Return *NULL* if an exception was raised by the codec.
-.. c:function:: PyObject* PyUnicode_DecodeUTF32Stateful(const char *s, Py_ssize_t size, const char *errors, int *byteorder, Py_ssize_t *consumed)
+.. c:function:: PyObject* PyUnicode_DecodeUTF32Stateful(const char *s, Py_ssize_t size, \
+ const char *errors, int *byteorder, Py_ssize_t *consumed)
If *consumed* is *NULL*, behave like :c:func:`PyUnicode_DecodeUTF32`. If
*consumed* is not *NULL*, :c:func:`PyUnicode_DecodeUTF32Stateful` will not treat
@@ -641,7 +1098,15 @@ These are the UTF-32 codec APIs:
that have been decoded will be stored in *consumed*.
-.. c:function:: PyObject* PyUnicode_EncodeUTF32(const Py_UNICODE *s, Py_ssize_t size, const char *errors, int byteorder)
+.. c:function:: PyObject* PyUnicode_AsUTF32String(PyObject *unicode)
+
+ Return a Python byte string using the UTF-32 encoding in native byte
+ order. The string always starts with a BOM mark. Error handling is "strict".
+ Return *NULL* if an exception was raised by the codec.
+
+
+.. c:function:: PyObject* PyUnicode_EncodeUTF32(const Py_UNICODE *s, Py_ssize_t size, \
+ const char *errors, int byteorder)
Return a Python bytes object holding the UTF-32 encoded value of the Unicode
data in *s*. Output is written according to the following byte order::
@@ -658,12 +1123,9 @@ These are the UTF-32 codec APIs:
Return *NULL* if an exception was raised by the codec.
-
-.. c:function:: PyObject* PyUnicode_AsUTF32String(PyObject *unicode)
-
- Return a Python byte string using the UTF-32 encoding in native byte
- order. The string always starts with a BOM mark. Error handling is "strict".
- Return *NULL* if an exception was raised by the codec.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsUTF32String`.
UTF-16 Codecs
@@ -672,7 +1134,8 @@ UTF-16 Codecs
These are the UTF-16 codec APIs:
-.. c:function:: PyObject* PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors, int *byteorder)
+.. c:function:: PyObject* PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, \
+ const char *errors, int *byteorder)
Decode *size* bytes from a UTF-16 encoded buffer string and return the
corresponding Unicode object. *errors* (if non-*NULL*) defines the error
@@ -699,7 +1162,8 @@ These are the UTF-16 codec APIs:
Return *NULL* if an exception was raised by the codec.
-.. c:function:: PyObject* PyUnicode_DecodeUTF16Stateful(const char *s, Py_ssize_t size, const char *errors, int *byteorder, Py_ssize_t *consumed)
+.. c:function:: PyObject* PyUnicode_DecodeUTF16Stateful(const char *s, Py_ssize_t size, \
+ const char *errors, int *byteorder, Py_ssize_t *consumed)
If *consumed* is *NULL*, behave like :c:func:`PyUnicode_DecodeUTF16`. If
*consumed* is not *NULL*, :c:func:`PyUnicode_DecodeUTF16Stateful` will not treat
@@ -708,7 +1172,15 @@ These are the UTF-16 codec APIs:
number of bytes that have been decoded will be stored in *consumed*.
-.. c:function:: PyObject* PyUnicode_EncodeUTF16(const Py_UNICODE *s, Py_ssize_t size, const char *errors, int byteorder)
+.. c:function:: PyObject* PyUnicode_AsUTF16String(PyObject *unicode)
+
+ Return a Python byte string using the UTF-16 encoding in native byte
+ order. The string always starts with a BOM mark. Error handling is "strict".
+ Return *NULL* if an exception was raised by the codec.
+
+
+.. c:function:: PyObject* PyUnicode_EncodeUTF16(const Py_UNICODE *s, Py_ssize_t size, \
+ const char *errors, int byteorder)
Return a Python bytes object holding the UTF-16 encoded value of the Unicode
data in *s*. Output is written according to the following byte order::
@@ -726,12 +1198,9 @@ These are the UTF-16 codec APIs:
Return *NULL* if an exception was raised by the codec.
-
-.. c:function:: PyObject* PyUnicode_AsUTF16String(PyObject *unicode)
-
- Return a Python byte string using the UTF-16 encoding in native byte
- order. The string always starts with a BOM mark. Error handling is "strict".
- Return *NULL* if an exception was raised by the codec.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsUTF16String`.
UTF-7 Codecs
@@ -746,7 +1215,8 @@ These are the UTF-7 codec APIs:
*s*. Return *NULL* if an exception was raised by the codec.
-.. c:function:: PyObject* PyUnicode_DecodeUTF7Stateful(const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed)
+.. c:function:: PyObject* PyUnicode_DecodeUTF7Stateful(const char *s, Py_ssize_t size, \
+ const char *errors, Py_ssize_t *consumed)
If *consumed* is *NULL*, behave like :c:func:`PyUnicode_DecodeUTF7`. If
*consumed* is not *NULL*, trailing incomplete UTF-7 base-64 sections will not
@@ -754,7 +1224,8 @@ These are the UTF-7 codec APIs:
bytes that have been decoded will be stored in *consumed*.
-.. c:function:: PyObject* PyUnicode_EncodeUTF7(const Py_UNICODE *s, Py_ssize_t size, int base64SetO, int base64WhiteSpace, const char *errors)
+.. c:function:: PyObject* PyUnicode_EncodeUTF7(const Py_UNICODE *s, Py_ssize_t size, \
+ int base64SetO, int base64WhiteSpace, const char *errors)
Encode the :c:type:`Py_UNICODE` buffer of the given size using UTF-7 and
return a Python bytes object. Return *NULL* if an exception was raised by
@@ -765,6 +1236,11 @@ These are the UTF-7 codec APIs:
nonzero, whitespace will be encoded in base-64. Both are set to zero for the
Python "utf-7" codec.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API.
+
+ .. XXX replace with what?
+
Unicode-Escape Codecs
"""""""""""""""""""""
@@ -772,24 +1248,29 @@ Unicode-Escape Codecs
These are the "Unicode Escape" codec APIs:
-.. c:function:: PyObject* PyUnicode_DecodeUnicodeEscape(const char *s, Py_ssize_t size, const char *errors)
+.. c:function:: PyObject* PyUnicode_DecodeUnicodeEscape(const char *s, \
+ Py_ssize_t size, const char *errors)
Create a Unicode object by decoding *size* bytes of the Unicode-Escape encoded
string *s*. Return *NULL* if an exception was raised by the codec.
+.. c:function:: PyObject* PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
+
+ Encode a Unicode object using Unicode-Escape and return the result as Python
+ string object. Error handling is "strict". Return *NULL* if an exception was
+ raised by the codec.
+
+
.. c:function:: PyObject* PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size)
Encode the :c:type:`Py_UNICODE` buffer of the given *size* using Unicode-Escape and
return a Python string object. Return *NULL* if an exception was raised by the
codec.
-
-.. c:function:: PyObject* PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
-
- Encode a Unicode object using Unicode-Escape and return the result as Python
- string object. Error handling is "strict". Return *NULL* if an exception was
- raised by the codec.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsUnicodeEscapeString`.
Raw-Unicode-Escape Codecs
@@ -798,19 +1279,13 @@ Raw-Unicode-Escape Codecs
These are the "Raw Unicode Escape" codec APIs:
-.. c:function:: PyObject* PyUnicode_DecodeRawUnicodeEscape(const char *s, Py_ssize_t size, const char *errors)
+.. c:function:: PyObject* PyUnicode_DecodeRawUnicodeEscape(const char *s, \
+ Py_ssize_t size, const char *errors)
Create a Unicode object by decoding *size* bytes of the Raw-Unicode-Escape
encoded string *s*. Return *NULL* if an exception was raised by the codec.
-.. c:function:: PyObject* PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size, const char *errors)
-
- Encode the :c:type:`Py_UNICODE` buffer of the given *size* using Raw-Unicode-Escape
- and return a Python string object. Return *NULL* if an exception was raised by
- the codec.
-
-
.. c:function:: PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
Encode a Unicode object using Raw-Unicode-Escape and return the result as
@@ -818,6 +1293,18 @@ These are the "Raw Unicode Escape" codec APIs:
was raised by the codec.
+.. c:function:: PyObject* PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, \
+ Py_ssize_t size, const char *errors)
+
+ Encode the :c:type:`Py_UNICODE` buffer of the given *size* using Raw-Unicode-Escape
+ and return a Python string object. Return *NULL* if an exception was raised by
+ the codec.
+
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsRawUnicodeEscapeString`.
+
+
Latin-1 Codecs
""""""""""""""
@@ -831,18 +1318,22 @@ ordinals and only these are accepted by the codecs during encoding.
*s*. Return *NULL* if an exception was raised by the codec.
+.. c:function:: PyObject* PyUnicode_AsLatin1String(PyObject *unicode)
+
+ Encode a Unicode object using Latin-1 and return the result as Python bytes
+ object. Error handling is "strict". Return *NULL* if an exception was
+ raised by the codec.
+
+
.. c:function:: PyObject* PyUnicode_EncodeLatin1(const Py_UNICODE *s, Py_ssize_t size, const char *errors)
Encode the :c:type:`Py_UNICODE` buffer of the given *size* using Latin-1 and
return a Python bytes object. Return *NULL* if an exception was raised by
the codec.
-
-.. c:function:: PyObject* PyUnicode_AsLatin1String(PyObject *unicode)
-
- Encode a Unicode object using Latin-1 and return the result as Python bytes
- object. Error handling is "strict". Return *NULL* if an exception was
- raised by the codec.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsLatin1String`.
ASCII Codecs
@@ -858,18 +1349,22 @@ codes generate errors.
*s*. Return *NULL* if an exception was raised by the codec.
+.. c:function:: PyObject* PyUnicode_AsASCIIString(PyObject *unicode)
+
+ Encode a Unicode object using ASCII and return the result as Python bytes
+ object. Error handling is "strict". Return *NULL* if an exception was
+ raised by the codec.
+
+
.. c:function:: PyObject* PyUnicode_EncodeASCII(const Py_UNICODE *s, Py_ssize_t size, const char *errors)
Encode the :c:type:`Py_UNICODE` buffer of the given *size* using ASCII and
return a Python bytes object. Return *NULL* if an exception was raised by
the codec.
-
-.. c:function:: PyObject* PyUnicode_AsASCIIString(PyObject *unicode)
-
- Encode a Unicode object using ASCII and return the result as Python bytes
- object. Error handling is "strict". Return *NULL* if an exception was
- raised by the codec.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsASCIIString`.
Character Map Codecs
@@ -898,7 +1393,8 @@ characters to different code points.
These are the mapping codec APIs:
-.. c:function:: PyObject* PyUnicode_DecodeCharmap(const char *s, Py_ssize_t size, PyObject *mapping, const char *errors)
+.. c:function:: PyObject* PyUnicode_DecodeCharmap(const char *s, Py_ssize_t size, \
+ PyObject *mapping, const char *errors)
Create a Unicode object by decoding *size* bytes of the encoded string *s* using
the given *mapping* object. Return *NULL* if an exception was raised by the
@@ -908,13 +1404,6 @@ These are the mapping codec APIs:
treated as "undefined mapping".
-.. c:function:: PyObject* PyUnicode_EncodeCharmap(const Py_UNICODE *s, Py_ssize_t size, PyObject *mapping, const char *errors)
-
- Encode the :c:type:`Py_UNICODE` buffer of the given *size* using the given
- *mapping* object and return a Python string object. Return *NULL* if an
- exception was raised by the codec.
-
-
.. c:function:: PyObject* PyUnicode_AsCharmapString(PyObject *unicode, PyObject *mapping)
Encode a Unicode object using the given *mapping* object and return the result
@@ -924,7 +1413,8 @@ These are the mapping codec APIs:
The following codec API is special in that maps Unicode to Unicode.
-.. c:function:: PyObject* PyUnicode_TranslateCharmap(const Py_UNICODE *s, Py_ssize_t size, PyObject *table, const char *errors)
+.. c:function:: PyObject* PyUnicode_TranslateCharmap(const Py_UNICODE *s, Py_ssize_t size, \
+ PyObject *table, const char *errors)
Translate a :c:type:`Py_UNICODE` buffer of the given *size* by applying a
character mapping *table* to it and return the resulting Unicode object. Return
@@ -937,6 +1427,22 @@ The following codec API is special in that maps Unicode to Unicode.
and sequences work well. Unmapped character ordinals (ones which cause a
:exc:`LookupError`) are left untouched and are copied as-is.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API.
+
+ .. XXX replace with what?
+
+
+.. c:function:: PyObject* PyUnicode_EncodeCharmap(const Py_UNICODE *s, Py_ssize_t size, \
+ PyObject *mapping, const char *errors)
+
+ Encode the :c:type:`Py_UNICODE` buffer of the given *size* using the given
+ *mapping* object and return a Python string object. Return *NULL* if an
+ exception was raised by the codec.
+
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsCharmapString`.
MBCS codecs for Windows
@@ -953,7 +1459,8 @@ the user settings on the machine running the codec.
Return *NULL* if an exception was raised by the codec.
-.. c:function:: PyObject* PyUnicode_DecodeMBCSStateful(const char *s, int size, const char *errors, int *consumed)
+.. c:function:: PyObject* PyUnicode_DecodeMBCSStateful(const char *s, int size, \
+ const char *errors, int *consumed)
If *consumed* is *NULL*, behave like :c:func:`PyUnicode_DecodeMBCS`. If
*consumed* is not *NULL*, :c:func:`PyUnicode_DecodeMBCSStateful` will not decode
@@ -961,18 +1468,31 @@ the user settings on the machine running the codec.
in *consumed*.
+.. c:function:: PyObject* PyUnicode_AsMBCSString(PyObject *unicode)
+
+ Encode a Unicode object using MBCS and return the result as Python bytes
+ object. Error handling is "strict". Return *NULL* if an exception was
+ raised by the codec.
+
+
+.. c:function:: PyObject* PyUnicode_EncodeCodePage(int code_page, PyObject *unicode, const char *errors)
+
+ Encode the Unicode object using the specified code page and return a Python
+ bytes object. Return *NULL* if an exception was raised by the codec. Use
+ :c:data:`CP_ACP` code page to get the MBCS encoder.
+
+ .. versionadded:: 3.3
+
+
.. c:function:: PyObject* PyUnicode_EncodeMBCS(const Py_UNICODE *s, Py_ssize_t size, const char *errors)
Encode the :c:type:`Py_UNICODE` buffer of the given *size* using MBCS and return
a Python bytes object. Return *NULL* if an exception was raised by the
codec.
-
-.. c:function:: PyObject* PyUnicode_AsMBCSString(PyObject *unicode)
-
- Encode a Unicode object using MBCS and return the result as Python bytes
- object. Error handling is "strict". Return *NULL* if an exception was
- raised by the codec.
+ .. deprecated-removed:: 3.3 4.0
+ Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
+ :c:func:`PyUnicode_AsMBCSString` or :c:func:`PyUnicode_EncodeCodePage`.
Methods & Slots
@@ -1011,7 +1531,8 @@ They all return *NULL* or ``-1`` if an exception occurs.
characters are not included in the resulting strings.
-.. c:function:: PyObject* PyUnicode_Translate(PyObject *str, PyObject *table, const char *errors)
+.. c:function:: PyObject* PyUnicode_Translate(PyObject *str, PyObject *table, \
+ const char *errors)
Translate a string by applying a character mapping table to it and return the
resulting Unicode object.
@@ -1033,14 +1554,16 @@ They all return *NULL* or ``-1`` if an exception occurs.
Unicode string.
-.. c:function:: int PyUnicode_Tailmatch(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end, int direction)
+.. c:function:: int PyUnicode_Tailmatch(PyObject *str, PyObject *substr, \
+ Py_ssize_t start, Py_ssize_t end, int direction)
Return 1 if *substr* matches ``str[start:end]`` at the given tail end
(*direction* == -1 means to do a prefix match, *direction* == 1 a suffix match),
0 otherwise. Return ``-1`` if an error occurred.
-.. c:function:: Py_ssize_t PyUnicode_Find(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end, int direction)
+.. c:function:: Py_ssize_t PyUnicode_Find(PyObject *str, PyObject *substr, \
+ Py_ssize_t start, Py_ssize_t end, int direction)
Return the first position of *substr* in ``str[start:end]`` using the given
*direction* (*direction* == 1 means to do a forward search, *direction* == -1 a
@@ -1049,13 +1572,27 @@ They all return *NULL* or ``-1`` if an exception occurs.
occurred and an exception has been set.
-.. c:function:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end)
+.. c:function:: Py_ssize_t PyUnicode_FindChar(PyObject *str, Py_UCS4 ch, \
+ Py_ssize_t start, Py_ssize_t end, int direction)
+
+ Return the first position of the character *ch* in ``str[start:end]`` using
+ the given *direction* (*direction* == 1 means to do a forward search,
+ *direction* == -1 a backward search). The return value is the index of the
+ first match; a value of ``-1`` indicates that no match was found, and ``-2``
+ indicates that an error occurred and an exception has been set.
+
+ .. versionadded:: 3.3
+
+
+.. c:function:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, \
+ Py_ssize_t start, Py_ssize_t end)
Return the number of non-overlapping occurrences of *substr* in
``str[start:end]``. Return ``-1`` if an error occurred.
-.. c:function:: PyObject* PyUnicode_Replace(PyObject *str, PyObject *substr, PyObject *replstr, Py_ssize_t maxcount)
+.. c:function:: PyObject* PyUnicode_Replace(PyObject *str, PyObject *substr, \
+ PyObject *replstr, Py_ssize_t maxcount)
Replace at most *maxcount* occurrences of *substr* in *str* with *replstr* and
return the resulting Unicode object. *maxcount* == -1 means replace all
@@ -1076,7 +1613,7 @@ They all return *NULL* or ``-1`` if an exception occurs.
ISO-8859-1 if it contains non-ASCII characters".
-.. c:function:: int PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
+.. c:function:: PyObject* PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
Rich compare two unicode strings and return one of the following:
@@ -1103,8 +1640,8 @@ They all return *NULL* or ``-1`` if an exception occurs.
Check whether *element* is contained in *container* and return true or false
accordingly.
- *element* has to coerce to a one element Unicode string. ``-1`` is returned if
- there was an error.
+ *element* has to coerce to a one element Unicode string. ``-1`` is returned
+ if there was an error.
.. c:function:: void PyUnicode_InternInPlace(PyObject **string)
@@ -1123,7 +1660,6 @@ They all return *NULL* or ``-1`` if an exception occurs.
.. c:function:: PyObject* PyUnicode_InternFromString(const char *v)
A combination of :c:func:`PyUnicode_FromString` and
- :c:func:`PyUnicode_InternInPlace`, returning either a new unicode string object
- that has been interned, or a new ("owned") reference to an earlier interned
- string object with the same value.
-
+ :c:func:`PyUnicode_InternInPlace`, returning either a new unicode string
+ object that has been interned, or a new ("owned") reference to an earlier
+ interned string object with the same value.
diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst
index 41cdd6b..499eb3e 100644
--- a/Doc/c-api/veryhigh.rst
+++ b/Doc/c-api/veryhigh.rst
@@ -95,12 +95,6 @@ the same library that the Python runtime is using.
leaving *closeit* set to ``0`` and *flags* set to *NULL*.
-.. c:function:: int PyRun_SimpleFileFlags(FILE *fp, const char *filename, PyCompilerFlags *flags)
-
- This is a simplified interface to :c:func:`PyRun_SimpleFileExFlags` below,
- leaving *closeit* set to ``0``.
-
-
.. c:function:: int PyRun_SimpleFileEx(FILE *fp, const char *filename, int closeit)
This is a simplified interface to :c:func:`PyRun_SimpleFileExFlags` below,
diff --git a/Doc/conf.py b/Doc/conf.py
index 555f281..6b085e0 100644
--- a/Doc/conf.py
+++ b/Doc/conf.py
@@ -91,7 +91,7 @@ html_additional_pages = {
}
# Output an OpenSearch description file.
-html_use_opensearch = 'http://docs.python.org/3.2'
+html_use_opensearch = 'http://docs.python.org/' + version
# Additional static files.
html_static_path = ['tools/sphinxext/static']
diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat
index c7d7bd1..a1004ad 100644
--- a/Doc/data/refcounts.dat
+++ b/Doc/data/refcounts.dat
@@ -465,6 +465,11 @@ PyFunction_New:PyObject*::+1:
PyFunction_New:PyObject*:code:+1:
PyFunction_New:PyObject*:globals:+1:
+PyFunction_NewWithQualName:PyObject*::+1:
+PyFunction_NewWithQualName:PyObject*:code:+1:
+PyFunction_NewWithQualName:PyObject*:globals:+1:
+PyFunction_NewWithQualName:PyObject*:qualname:+1:
+
PyFunction_SetClosure:int:::
PyFunction_SetClosure:PyObject*:op:0:
PyFunction_SetClosure:PyObject*:closure:+1:
diff --git a/Doc/distutils/apiref.rst b/Doc/distutils/apiref.rst
index e15dc76..71702e5 100644
--- a/Doc/distutils/apiref.rst
+++ b/Doc/distutils/apiref.rst
@@ -160,7 +160,7 @@ the full reference.
.. class:: Extension
The Extension class describes a single C or C++extension module in a setup
- script. It accepts the following keyword arguments in its constructor
+ script. It accepts the following keyword arguments in its constructor:
+------------------------+--------------------------------+---------------------------+
| argument name | value | type |
@@ -1157,12 +1157,11 @@ other utility module.
.. function:: grok_environment_error(exc[, prefix='error: '])
- Generate a useful error message from an :exc:`EnvironmentError` (:exc:`IOError`
- or :exc:`OSError`) exception object. Handles Python 1.5.1 and later styles,
- and does what it can to deal with exception objects that don't have a filename
- (which happens when the error is due to a two-file operation, such as
- :func:`rename` or :func:`link`). Returns the error message as a string
- prefixed with *prefix*.
+ Generate a useful error message from an :exc:`OSError` exception object.
+ Handles Python 1.5.1 and later styles, and does what it can to deal with
+ exception objects that don't have a filename (which happens when the error
+ is due to a two-file operation, such as :func:`rename` or :func:`link`).
+ Returns the error message as a string prefixed with *prefix*.
.. function:: split_quoted(s)
diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst
index 7fd9f72..ef95ac9 100644
--- a/Doc/extending/extending.rst
+++ b/Doc/extending/extending.rst
@@ -321,7 +321,7 @@ parameters to be passed in as a tuple acceptable for parsing via
The :const:`METH_KEYWORDS` bit may be set in the third field if keyword
arguments should be passed to the function. In this case, the C function should
-accept a third ``PyObject \*`` parameter which will be a dictionary of keywords.
+accept a third ``PyObject *`` parameter which will be a dictionary of keywords.
Use :c:func:`PyArg_ParseTupleAndKeywords` to parse the arguments to such a
function.
@@ -384,9 +384,6 @@ optionally followed by an import of the module::
imports it. */
PyImport_ImportModule("spam");
-An example may be found in the file :file:`Demo/embed/demo.c` in the Python
-source distribution.
-
.. note::
Removing entries from ``sys.modules`` or importing compiled modules into
diff --git a/Doc/extending/newtypes.rst b/Doc/extending/newtypes.rst
index 3001415..376d09a 100644
--- a/Doc/extending/newtypes.rst
+++ b/Doc/extending/newtypes.rst
@@ -26,11 +26,12 @@ The Basics
==========
The Python runtime sees all Python objects as variables of type
-:c:type:`PyObject\*`. A :c:type:`PyObject` is not a very magnificent object - it
-just contains the refcount and a pointer to the object's "type object". This is
-where the action is; the type object determines which (C) functions get called
-when, for instance, an attribute gets looked up on an object or it is multiplied
-by another object. These C functions are called "type methods".
+:c:type:`PyObject\*`, which serves as a "base type" for all Python objects.
+:c:type:`PyObject` itself only contains the refcount and a pointer to the
+object's "type object". This is where the action is; the type object determines
+which (C) functions get called when, for instance, an attribute gets looked
+up on an object or it is multiplied by another object. These C functions
+are called "type methods".
So, if you want to define a new object type, you need to create a new type
object.
@@ -50,15 +51,15 @@ The first bit that will be new is::
PyObject_HEAD
} noddy_NoddyObject;
-This is what a Noddy object will contain---in this case, nothing more than every
-Python object contains, namely a refcount and a pointer to a type object. These
-are the fields the ``PyObject_HEAD`` macro brings in. The reason for the macro
-is to standardize the layout and to enable special debugging fields in debug
-builds. Note that there is no semicolon after the ``PyObject_HEAD`` macro; one
-is included in the macro definition. Be wary of adding one by accident; it's
-easy to do from habit, and your compiler might not complain, but someone else's
-probably will! (On Windows, MSVC is known to call this an error and refuse to
-compile the code.)
+This is what a Noddy object will contain---in this case, nothing more than what
+every Python object contains---a refcount and a pointer to a type object.
+These are the fields the ``PyObject_HEAD`` macro brings in. The reason for the
+macro is to standardize the layout and to enable special debugging fields in
+debug builds. Note that there is no semicolon after the ``PyObject_HEAD``
+macro; one is included in the macro definition. Be wary of adding one by
+accident; it's easy to do from habit, and your compiler might not complain,
+but someone else's probably will! (On Windows, MSVC is known to call this an
+error and refuse to compile the code.)
For contrast, let's take a look at the corresponding definition for standard
Python floats::
@@ -224,7 +225,7 @@ doesn't do anything. It can't even be subclassed.
Adding data and methods to the Basic example
--------------------------------------------
-Let's expend the basic example to add some data and methods. Let's also make
+Let's extend the basic example to add some data and methods. Let's also make
the type usable as a base class. We'll create a new module, :mod:`noddy2` that
adds these capabilities:
@@ -288,18 +289,16 @@ strings, so we provide a new method::
self = (Noddy *)type->tp_alloc(type, 0);
if (self != NULL) {
self->first = PyString_FromString("");
- if (self->first == NULL)
- {
+ if (self->first == NULL) {
Py_DECREF(self);
return NULL;
- }
+ }
self->last = PyString_FromString("");
- if (self->last == NULL)
- {
+ if (self->last == NULL) {
Py_DECREF(self);
return NULL;
- }
+ }
self->number = 0;
}
@@ -327,8 +326,8 @@ any arguments passed when the type was called, and that returns the new object
created. New methods always accept positional and keyword arguments, but they
often ignore the arguments, leaving the argument handling to initializer
methods. Note that if the type supports subclassing, the type passed may not be
-the type being defined. The new method calls the tp_alloc slot to allocate
-memory. We don't fill the :attr:`tp_alloc` slot ourselves. Rather
+the type being defined. The new method calls the :attr:`tp_alloc` slot to
+allocate memory. We don't fill the :attr:`tp_alloc` slot ourselves. Rather
:c:func:`PyType_Ready` fills it for us by inheriting it from our base class,
which is :class:`object` by default. Most types use the default allocation.
@@ -445,15 +444,6 @@ concatenation of the first and last names. ::
static PyObject *
Noddy_name(Noddy* self)
{
- static PyObject *format = NULL;
- PyObject *args, *result;
-
- if (format == NULL) {
- format = PyString_FromString("%s %s");
- if (format == NULL)
- return NULL;
- }
-
if (self->first == NULL) {
PyErr_SetString(PyExc_AttributeError, "first");
return NULL;
@@ -464,20 +454,13 @@ concatenation of the first and last names. ::
return NULL;
}
- args = Py_BuildValue("OO", self->first, self->last);
- if (args == NULL)
- return NULL;
-
- result = PyString_Format(format, args);
- Py_DECREF(args);
-
- return result;
+ return PyUnicode_FromFormat("%S %S", self->first, self->last);
}
The method is implemented as a C function that takes a :class:`Noddy` (or
:class:`Noddy` subclass) instance as the first argument. Methods always take an
instance as the first argument. Methods often take positional and keyword
-arguments as well, but in this cased we don't take any and don't need to accept
+arguments as well, but in this case we don't take any and don't need to accept
a positional argument tuple or keyword argument dictionary. This method is
equivalent to the Python method::
@@ -1124,9 +1107,6 @@ needed for methods inherited from a base type. One additional entry is needed
at the end; it is a sentinel that marks the end of the array. The
:attr:`ml_name` field of the sentinel must be *NULL*.
-XXX Need to refer to some unified discussion of the structure fields, shared
-with the next section.
-
The second table is used to define attributes which map directly to data stored
in the instance. A variety of primitive C types are supported, and access may
be read-only or read-write. The structures in the table are defined as::
@@ -1146,8 +1126,6 @@ type which will be able to extract a value from the instance structure. The
convert Python values to and from C values. The :attr:`flags` field is used to
store flags which control how the attribute can be accessed.
-XXX Need to move some of this to a shared section!
-
The following flag constants are defined in :file:`structmember.h`; they may be
combined using bitwise-OR.
@@ -1372,7 +1350,7 @@ Here is a desultory example of the implementation of the call function. ::
return result;
}
-XXX some fields need to be added here... ::
+::
/* Iterators */
getiterfunc tp_iter;
diff --git a/Doc/faq/design.rst b/Doc/faq/design.rst
index 7c5116d..6b8a8fd 100644
--- a/Doc/faq/design.rst
+++ b/Doc/faq/design.rst
@@ -515,14 +515,16 @@ far) under most circumstances, and the implementation is simpler.
Dictionaries work by computing a hash code for each key stored in the dictionary
using the :func:`hash` built-in function. The hash code varies widely depending
-on the key; for example, "Python" hashes to -539294296 while "python", a string
-that differs by a single bit, hashes to 1142331976. The hash code is then used
-to calculate a location in an internal array where the value will be stored.
-Assuming that you're storing keys that all have different hash values, this
-means that dictionaries take constant time -- O(1), in computer science notation
--- to retrieve a key. It also means that no sorted order of the keys is
-maintained, and traversing the array as the ``.keys()`` and ``.items()`` do will
-output the dictionary's content in some arbitrary jumbled order.
+on the key and a per-process seed; for example, "Python" could hash to
+-539294296 while "python", a string that differs by a single bit, could hash
+to 1142331976. The hash code is then used to calculate a location in an
+internal array where the value will be stored. Assuming that you're storing
+keys that all have different hash values, this means that dictionaries take
+constant time -- O(1), in computer science notation -- to retrieve a key. It
+also means that no sorted order of the keys is maintained, and traversing the
+array as the ``.keys()`` and ``.items()`` do will output the dictionary's
+content in some arbitrary jumbled order that can change with every invocation of
+a program.
Why must dictionary keys be immutable?
@@ -634,7 +636,7 @@ construction of large programs.
Python 2.6 adds an :mod:`abc` module that lets you define Abstract Base Classes
(ABCs). You can then use :func:`isinstance` and :func:`issubclass` to check
whether an instance or a class implements a particular ABC. The
-:mod:`collections` module defines a set of useful ABCs such as
+:mod:`collections.abc` module defines a set of useful ABCs such as
:class:`Iterable`, :class:`Container`, and :class:`MutableMapping`.
For Python, many of the advantages of interface specifications can be obtained
diff --git a/Doc/faq/extending.rst b/Doc/faq/extending.rst
index 7c684a0..fa245c7 100644
--- a/Doc/faq/extending.rst
+++ b/Doc/faq/extending.rst
@@ -445,34 +445,3 @@ In Python 2.2, you can inherit from built-in classes such as :class:`int`,
The Boost Python Library (BPL, http://www.boost.org/libs/python/doc/index.html)
provides a way of doing this from C++ (i.e. you can inherit from an extension
class written in C++ using the BPL).
-
-
-When importing module X, why do I get "undefined symbol: PyUnicodeUCS2*"?
--------------------------------------------------------------------------
-
-You are using a version of Python that uses a 4-byte representation for Unicode
-characters, but some C extension module you are importing was compiled using a
-Python that uses a 2-byte representation for Unicode characters (the default).
-
-If instead the name of the undefined symbol starts with ``PyUnicodeUCS4``, the
-problem is the reverse: Python was built using 2-byte Unicode characters, and
-the extension module was compiled using a Python with 4-byte Unicode characters.
-
-This can easily occur when using pre-built extension packages. RedHat Linux
-7.x, in particular, provided a "python2" binary that is compiled with 4-byte
-Unicode. This only causes the link failure if the extension uses any of the
-``PyUnicode_*()`` functions. It is also a problem if an extension uses any of
-the Unicode-related format specifiers for :c:func:`Py_BuildValue` (or similar) or
-parameter specifications for :c:func:`PyArg_ParseTuple`.
-
-You can check the size of the Unicode character a Python interpreter is using by
-checking the value of sys.maxunicode:
-
- >>> import sys
- >>> if sys.maxunicode > 65535:
- ... print('UCS4 build')
- ... else:
- ... print('UCS2 build')
-
-The only way to solve this problem is to use extension modules compiled with a
-Python binary built using the same size for Unicode characters.
diff --git a/Doc/faq/library.rst b/Doc/faq/library.rst
index 7385c59..cab2d7b 100644
--- a/Doc/faq/library.rst
+++ b/Doc/faq/library.rst
@@ -351,7 +351,7 @@ When run, this will produce the following output:
Worker <Thread(worker 1, started 130283832797456)> running with argument 5
...
-Consult the module's documentation for more details; the :class:`~queue.Queue``
+Consult the module's documentation for more details; the :class:`~queue.Queue`
class provides a featureful interface.
diff --git a/Doc/glossary.rst b/Doc/glossary.rst
index 3c11de7..35bdb96 100644
--- a/Doc/glossary.rst
+++ b/Doc/glossary.rst
@@ -34,7 +34,7 @@ Glossary
subclasses, which are classes that don't inherit from a class but are
still recognized by :func:`isinstance` and :func:`issubclass`; see the
:mod:`abc` module documentation. Python comes with many built-in ABCs for
- data structures (in the :mod:`collections` module), numbers (in the
+ data structures (in the :mod:`collections.abc` module), numbers (in the
:mod:`numbers` module), streams (in the :mod:`io` module), import finders
and loaders (in the :mod:`importlib.abc` module). You can create your own
ABCs with the :mod:`abc` module.
@@ -209,9 +209,9 @@ Glossary
finder
An object that tries to find the :term:`loader` for a module. It must
- implement a method named :meth:`find_module`. See :pep:`302` for
- details and :class:`importlib.abc.Finder` for an
- :term:`abstract base class`.
+ implement either a method named :meth:`find_loader` or a method named
+ :meth:`find_module`. See :pep:`302` and :pep:`420` for details and
+ :class:`importlib.abc.Finder` for an :term:`abstract base class`.
floor division
Mathematical division that rounds down to nearest integer. The floor
@@ -315,6 +315,17 @@ Glossary
role in places where a constant hash value is needed, for example as a key
in a dictionary.
+ import path
+ A list of locations (or :term:`path entries <path entry>`) that are
+ searched by the :term:`path based finder` for modules to import. During
+ import, this list of locations usually comes from :data:`sys.path`, but
+ for subpackages it may also come from the parent package's ``__path__``
+ attribute.
+
+ importing
+ The process by which Python code in one module is made available to
+ Python code in another module.
+
importer
An object that both finds and loads a module; both a
:term:`finder` and :term:`loader` object.
@@ -434,12 +445,17 @@ Glossary
mapping
A container object that supports arbitrary key lookups and implements the
- methods specified in the :class:`~collections.Mapping` or
- :class:`~collections.MutableMapping`
+ methods specified in the :class:`~collections.abc.Mapping` or
+ :class:`~collections.abc.MutableMapping`
:ref:`abstract base classes <collections-abstract-base-classes>`. Examples
include :class:`dict`, :class:`collections.defaultdict`,
:class:`collections.OrderedDict` and :class:`collections.Counter`.
+ meta path finder
+ A finder returned by a search of :data:`sys.meta_path`. Meta path
+ finders are related to, but different from :term:`path entry finders
+ <path entry finder>`.
+
metaclass
The class of a class. Class definitions create a class name, a class
dictionary, and a list of base classes. The metaclass is responsible for
@@ -464,6 +480,11 @@ Glossary
for a member during lookup. See `The Python 2.3 Method Resolution Order
<http://www.python.org/download/releases/2.3/mro/>`_.
+ module
+ An object that serves as an organizational unit of Python code. Modules
+ have a namespace containing arbitrary Python objects. Modules are loaded
+ into Python by the process of :term:`importing`.
+
MRO
See :term:`method resolution order`.
@@ -496,6 +517,12 @@ Glossary
functions are implemented by the :mod:`random` and :mod:`itertools`
modules, respectively.
+ namespace package
+ A :pep:`420` :term:`package` which serves only as a container for
+ subpackages. Namespace packages may have no physical representation,
+ and specifically are not like a :term:`regular package` because they
+ have no ``__init__.py`` file.
+
nested scope
The ability to refer to a variable in an enclosing definition. For
instance, a function defined inside another function can refer to
@@ -516,6 +543,33 @@ Glossary
(methods). Also the ultimate base class of any :term:`new-style
class`.
+ package
+ A Python module which can contain submodules or recursively,
+ subpackages. Technically, a package is a Python module with an
+ ``__path__`` attribute.
+
+ path entry
+ A single location on the :term:`import path` which the :term:`path
+ based finder` consults to find modules for importing.
+
+ path entry finder
+ A :term:`finder` returned by a callable on :data:`sys.path_hooks`
+ (i.e. a :term:`path entry hook`) which knows how to locate modules given
+ a :term:`path entry`.
+
+ path entry hook
+ A callable on the :data:`sys.path_hook` list which returns a :term:`path
+ entry finder` if it knows how to find modules on a specific :term:`path
+ entry`.
+
+ path based finder
+ One of the default :term:`meta path finders <meta path finder>` which
+ searches an :term:`import path` for modules.
+
+ portion
+ A set of files in a single directory (possibly stored in a zip file)
+ that contribute to a namespace package, as defined in :pep:`420`.
+
positional argument
The arguments assigned to local names inside a function or method,
determined by the order in which they were given in the call. ``*`` is
@@ -523,9 +577,23 @@ Glossary
definition), or pass several arguments as a list to a function. See
:term:`argument`.
+ provisional package
+ A provisional package is one which has been deliberately excluded from
+ the standard library's backwards compatibility guarantees. While major
+ changes to such packages are not expected, as long as they are marked
+ provisional, backwards incompatible changes (up to and including removal
+ of the package) may occur if deemed necessary by core developers. Such
+ changes will not be made gratuitously -- they will occur only if serious
+ flaws are uncovered that were missed prior to the inclusion of the
+ package.
+
+ This process allows the standard library to continue to evolve over
+ time, without locking in problematic design errors for extended periods
+ of time. See :pep:`411` for more details.
+
Python 3000
- Nickname for the Python 3.x release line (coined long ago when the release
- of version 3 was something in the distant future.) This is also
+ Nickname for the Python 3.x release line (coined long ago when the
+ release of version 3 was something in the distant future.) This is also
abbreviated "Py3k".
Pythonic
@@ -544,6 +612,32 @@ Glossary
for piece in food:
print(piece)
+ qualified name
+ A dotted name showing the "path" from a module's global scope to a
+ class, function or method defined in that module, as defined in
+ :pep:`3155`. For top-level functions and classes, the qualified name
+ is the same as the object's name::
+
+ >>> class C:
+ ... class D:
+ ... def meth(self):
+ ... pass
+ ...
+ >>> C.__qualname__
+ 'C'
+ >>> C.D.__qualname__
+ 'C.D'
+ >>> C.D.meth.__qualname__
+ 'C.D.meth'
+
+ When used to refer to modules, the *fully qualified name* means the
+ entire dotted path to the module, including any parent packages,
+ e.g. ``email.mime.text``::
+
+ >>> import email.mime.text
+ >>> email.mime.text.__name__
+ 'email.mime.text'
+
reference count
The number of references to an object. When the reference count of an
object drops to zero, it is deallocated. Reference counting is
@@ -552,6 +646,10 @@ Glossary
:func:`~sys.getrefcount` function that programmers can call to return the
reference count for a particular object.
+ regular package
+ A traditional :term:`package`, such as a directory containing an
+ ``__init__.py`` file.
+
__slots__
A declaration inside a class that saves memory by pre-declaring space for
instance attributes and eliminating instance dictionaries. Though
@@ -586,6 +684,14 @@ Glossary
an :term:`expression` or a one of several constructs with a keyword, such
as :keyword:`if`, :keyword:`while` or :keyword:`for`.
+ struct sequence
+ A tuple with named elements. Struct sequences expose an interface similar
+ to :term:`named tuple` in that elements can either be accessed either by
+ index or as an attribute. However, they do not have any of the named tuple
+ methods like :meth:`~collections.somenamedtuple._make` or
+ :meth:`~collections.somenamedtuple._asdict`. Examples of struct sequences
+ include :data:`sys.float_info` and the return value of :func:`os.stat`.
+
triple-quoted string
A string which is bound by three instances of either a quotation mark
(") or an apostrophe ('). While they don't provide any functionality
diff --git a/Doc/howto/descriptor.rst b/Doc/howto/descriptor.rst
index 1616f67..0b513f9 100644
--- a/Doc/howto/descriptor.rst
+++ b/Doc/howto/descriptor.rst
@@ -36,9 +36,7 @@ continuing through the base classes of ``type(a)`` excluding metaclasses. If the
looked-up value is an object defining one of the descriptor methods, then Python
may override the default behavior and invoke the descriptor method instead.
Where this occurs in the precedence chain depends on which descriptor methods
-were defined. Note that descriptors are only invoked for new style objects or
-classes (a class is new style if it inherits from :class:`object` or
-:class:`type`).
+were defined.
Descriptors are a powerful, general purpose protocol. They are the mechanism
behind properties, methods, static methods, class methods, and :func:`super()`.
@@ -89,8 +87,6 @@ of ``obj``. If ``d`` defines the method :meth:`__get__`, then ``d.__get__(obj)`
is invoked according to the precedence rules listed below.
The details of invocation depend on whether ``obj`` is an object or a class.
-Either way, descriptors only work for new style objects and classes. A class is
-new style if it is a subclass of :class:`object`.
For objects, the machinery is in :meth:`object.__getattribute__` which
transforms ``b.x`` into ``type(b).__dict__['x'].__get__(b, type(b))``. The
@@ -115,7 +111,6 @@ The important points to remember are:
* descriptors are invoked by the :meth:`__getattribute__` method
* overriding :meth:`__getattribute__` prevents automatic descriptor calls
-* :meth:`__getattribute__` is only available with new style classes and objects
* :meth:`object.__getattribute__` and :meth:`type.__getattribute__` make
different calls to :meth:`__get__`.
* data descriptors always override instance dictionaries.
@@ -128,10 +123,7 @@ and then returns ``A.__dict__['m'].__get__(obj, A)``. If not a descriptor,
``m`` is returned unchanged. If not in the dictionary, ``m`` reverts to a
search using :meth:`object.__getattribute__`.
-Note, in Python 2.2, ``super(B, obj).m()`` would only invoke :meth:`__get__` if
-``m`` was a data descriptor. In Python 2.3, non-data descriptors also get
-invoked unless an old-style class is involved. The implementation details are
-in :c:func:`super_getattro()` in
+The implementation details are in :c:func:`super_getattro()` in
`Objects/typeobject.c <http://svn.python.org/view/python/trunk/Objects/typeobject.c?view=markup>`_
and a pure Python equivalent can be found in `Guido's Tutorial`_.
diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst
index ebbb229..b621a84 100644
--- a/Doc/howto/functional.rst
+++ b/Doc/howto/functional.rst
@@ -292,13 +292,14 @@ ordering of the objects in the dictionary.
Applying :func:`iter` to a dictionary always loops over the keys, but
dictionaries have methods that return other iterators. If you want to iterate
over values or key/value pairs, you can explicitly call the
-:meth:`~dict.values` or :meth:`~dict.items` methods to get an appropriate iterator.
+:meth:`~dict.values` or :meth:`~dict.items` methods to get an appropriate
+iterator.
The :func:`dict` constructor can accept an iterator that returns a finite stream
of ``(key, value)`` tuples:
>>> L = [('Italy', 'Rome'), ('France', 'Paris'), ('US', 'Washington DC')]
- >>> dict(iter(L))
+ >>> dict(iter(L)) #doctest: +SKIP
{'Italy': 'Rome', 'US': 'Washington DC', 'France': 'Paris'}
Files also support iteration by calling the :meth:`~io.TextIOBase.readline`
diff --git a/Doc/howto/index.rst b/Doc/howto/index.rst
index a11d3da..f44e8c0 100644
--- a/Doc/howto/index.rst
+++ b/Doc/howto/index.rst
@@ -28,4 +28,5 @@ Currently, the HOWTOs are:
urllib2.rst
webservers.rst
argparse.rst
+ ipaddress.rst
diff --git a/Doc/howto/ipaddress.rst b/Doc/howto/ipaddress.rst
new file mode 100644
index 0000000..5e0ff3e
--- /dev/null
+++ b/Doc/howto/ipaddress.rst
@@ -0,0 +1,341 @@
+.. _ipaddress-howto:
+
+***************************************
+An introduction to the ipaddress module
+***************************************
+
+:author: Peter Moody
+:author: Nick Coghlan
+
+.. topic:: Overview
+
+ This document aims to provide a gentle introduction to the
+ :mod:`ipaddress` module. It is aimed primarily at users that aren't
+ already familiar with IP networking terminology, but may also be useful
+ to network engineers wanting an overview of how :mod:`ipaddress`
+ represents IP network addressing concepts.
+
+
+Creating Address/Network/Interface objects
+==========================================
+
+Since :mod:`ipaddress` is a module for inspecting and manipulating IP addresses,
+the first thing you'll want to do is create some objects. You can use
+:mod:`ipaddress` to create objects from strings and integers.
+
+
+A Note on IP Versions
+---------------------
+
+For readers that aren't particularly familiar with IP addressing, it's
+important to know that the Internet Protocol is currently in the process
+of moving from version 4 of the protocol to version 6. This transition is
+occurring largely because version 4 of the protocol doesn't provide enough
+addresses to handle the needs of the whole world, especially given the
+increasing number of devices with direct connections to the internet.
+
+Explaining the details of the differences between the two versions of the
+protocol is beyond the scope of this introduction, but readers need to at
+least be aware that these two versions exist, and it will sometimes be
+necessary to force the use of one version or the other.
+
+
+IP Host Addresses
+-----------------
+
+Addresses, often referred to as "host addresses" are the most basic unit
+when working with IP addressing. The simplest way to create addresses is
+to use the :func:`ipaddress.ip_address` factory function, which automatically
+determines whether to create an IPv4 or IPv6 address based on the passed in
+value:
+
+.. testsetup::
+ >>> import ipaddress
+
+::
+
+ >>> ipaddress.ip_address('192.0.2.1')
+ IPv4Address('192.0.2.1')
+ >>> ipaddress.ip_address('2001:DB8::1')
+ IPv6Address('2001:db8::1')
+
+Addresses can also be created directly from integers. Values that will
+fit within 32 bits are assumed to be IPv4 addresses::
+
+ >>> ipaddress.ip_address(3221225985)
+ IPv4Address('192.0.2.1')
+ >>> ipaddress.ip_address(42540766411282592856903984951653826561)
+ IPv6Address('2001:db8::1')
+
+To force the use of IPv4 or IPv6 addresses, the relevant classes can be
+invoked directly. This is particularly useful to force creation of IPv6
+addresses for small integers::
+
+ >>> ipaddress.ip_address(1)
+ IPv4Address('0.0.0.1')
+ >>> ipaddress.IPv4Address(1)
+ IPv4Address('0.0.0.1')
+ >>> ipaddress.IPv6Address(1)
+ IPv6Address('::1')
+
+
+Defining Networks
+-----------------
+
+Host addresses are usually grouped together into IP networks, so
+:mod:`ipaddress` provides a way to create, inspect and manipulate network
+definitions. IP network objects are constructed from strings that define the
+range of host addresses that are part of that network. The simplest form
+for that information is a "network address/network prefix" pair, where the
+prefix defines the number of leading bits that are compared to determine
+whether or not an address is part of the network and the network address
+defines the expected value of those bits.
+
+As for addresses, a factory function is provided that determines the correct
+IP version automatically::
+
+ >>> ipaddress.ip_network('192.0.2.0/24')
+ IPv4Network('192.0.2.0/24')
+ >>> ipaddress.ip_network('2001:db8::0/96')
+ IPv6Network('2001:db8::/96')
+
+Network objects cannot have any host bits set. The practical effect of this
+is that ``192.0.2.1/24`` does not describe a network. Such definitions are
+referred to as interface objects since the ip-on-a-network notation is
+commonly used to describe network interfaces of a computer on a given network
+and are described further in the next section.
+
+By default, attempting to create a network object with host bits set will
+result in :exc:`ValueError` being raised. To request that the
+additional bits instead be coerced to zero, the flag ``strict=False`` can
+be passed to the constructor::
+
+ >>> ipaddress.ip_network('192.0.2.1/24')
+ Traceback (most recent call last):
+ ...
+ ValueError: 192.0.2.1/24 has host bits set
+ >>> ipaddress.ip_network('192.0.2.1/24', strict=False)
+ IPv4Network('192.0.2.0/24')
+
+While the string form offers significantly more flexibility, networks can
+also be defined with integers, just like host addresses. In this case, the
+network is considered to contain only the single address identified by the
+integer, so the network prefix includes the entire network address::
+
+ >>> ipaddress.ip_network(3221225984)
+ IPv4Network('192.0.2.0/32')
+ >>> ipaddress.ip_network(42540766411282592856903984951653826560)
+ IPv6Network('2001:db8::/128')
+
+As with addresses, creation of a particular kind of network can be forced
+by calling the class constructor directly instead of using the factory
+function.
+
+
+Host Interfaces
+---------------
+
+As mentioned just above, if you need to describe an address on a particular
+network, neither the address nor the network classes are sufficient.
+Notation like ``192.0.2.1/24`` is commonly used by network engineers and the
+people who write tools for firewalls and routers as shorthand for "the host
+``192.0.2.1`` on the network ``192.0.2.0/24``", Accordingly, :mod:`ipaddress`
+provides a set of hybrid classes that associate an address with a particular
+network. The interface for creation is identical to that for defining network
+objects, except that the address portion isn't constrained to being a network
+address.
+
+ >>> ipaddress.ip_interface('192.0.2.1/24')
+ IPv4Interface('192.0.2.1/24')
+ >>> ipaddress.ip_interface('2001:db8::1/96')
+ IPv6Interface('2001:db8::1/96')
+
+Integer inputs are accepted (as with networks), and use of a particular IP
+version can be forced by calling the relevant constructor directly.
+
+
+Inspecting Address/Network/Interface Objects
+============================================
+
+You've gone to the trouble of creating an IPv(4|6)(Address|Network|Interface)
+object, so you probably want to get information about it. :mod:`ipaddress`
+tries to make doing this easy and intuitive.
+
+Extracting the IP version::
+
+ >>> addr4 = ipaddress.ip_address('192.0.2.1')
+ >>> addr6 = ipaddress.ip_address('2001:db8::1')
+ >>> addr6.version
+ 6
+ >>> addr4.version
+ 4
+
+Obtaining the network from an interface::
+
+ >>> host4 = ipaddress.ip_interface('192.0.2.1/24')
+ >>> host4.network
+ IPv4Network('192.0.2.0/24')
+ >>> host6 = ipaddress.ip_interface('2001:db8::1/96')
+ >>> host6.network
+ IPv6Network('2001:db8::/96')
+
+Finding out how many individual addresses are in a network::
+
+ >>> net4 = ipaddress.ip_network('192.0.2.0/24')
+ >>> net4.num_addresses
+ 256
+ >>> net6 = ipaddress.ip_network('2001:db8::0/96')
+ >>> net6.num_addresses
+ 4294967296
+
+Iterating through the "usable" addresses on a network::
+
+ >>> net4 = ipaddress.ip_network('192.0.2.0/24')
+ >>> for x in net4.hosts():
+ ... print(x) # doctest: +ELLIPSIS
+ 192.0.2.1
+ 192.0.2.2
+ 192.0.2.3
+ 192.0.2.4
+ ...
+ 192.0.2.252
+ 192.0.2.253
+ 192.0.2.254
+
+
+Obtaining the netmask (i.e. set bits corresponding to the network prefix) or
+the hostmask (any bits that are not part of the netmask):
+
+ >>> net4 = ipaddress.ip_network('192.0.2.0/24')
+ >>> net4.netmask
+ IPv4Address('255.255.255.0')
+ >>> net4.hostmask
+ IPv4Address('0.0.0.255')
+ >>> net6 = ipaddress.ip_network('2001:db8::0/96')
+ >>> net6.netmask
+ IPv6Address('ffff:ffff:ffff:ffff:ffff:ffff::')
+ >>> net6.hostmask
+ IPv6Address('::ffff:ffff')
+
+
+Exploding or compressing the address::
+
+ >>> addr6.exploded
+ '2001:0db8:0000:0000:0000:0000:0000:0001'
+ >>> addr6.compressed
+ '2001:db8::1'
+ >>> net6.exploded
+ '2001:0db8:0000:0000:0000:0000:0000:0000/96'
+ >>> net6.compressed
+ '2001:db8::/96'
+
+While IPv4 doesn't support explosion or compression, the associated objects
+still provide the relevant properties so that version neutral code can
+easily ensure the most concise or most verbose form is used for IPv6
+addresses while still correctly handling IPv4 addresses.
+
+
+Networks as lists of Addresses
+==============================
+
+It's sometimes useful to treat networks as lists. This means it is possible
+to index them like this::
+
+ >>> net4[1]
+ IPv4Address('192.0.2.1')
+ >>> net4[-1]
+ IPv4Address('192.0.2.255')
+ >>> net6[1]
+ IPv6Address('2001:db8::1')
+ >>> net6[-1]
+ IPv6Address('2001:db8::ffff:ffff')
+
+
+It also means that network objects lend themselves to using the list
+membership test syntax like this::
+
+ if address in network:
+ # do something
+
+Containment testing is done efficiently based on the network prefix::
+
+ >>> addr4 = ipaddress.ip_address('192.0.2.1')
+ >>> addr4 in ipaddress.ip_network('192.0.2.0/24')
+ True
+ >>> addr4 in ipaddress.ip_network('192.0.3.0/24')
+ False
+
+
+Comparisons
+===========
+
+:mod:`ipaddress` provides some simple, hopefully intuitive ways to compare
+objects, where it makes sense::
+
+ >>> ipaddress.ip_address('192.0.2.1') < ipaddress.ip_address('192.0.2.2')
+ True
+
+A :exc:`TypeError` exception is raised if you try to compare objects of
+different versions or different types.
+
+
+Using IP Addresses with other modules
+=====================================
+
+Other modules that use IP addresses (such as :mod:`socket`) usually won't
+accept objects from this module directly. Instead, they must be coerced to
+an integer or string that the other module will accept::
+
+ >>> addr4 = ipaddress.ip_address('192.0.2.1')
+ >>> str(addr4)
+ '192.0.2.1'
+ >>> int(addr4)
+ 3221225985
+
+
+Getting more detail when instance creation fails
+================================================
+
+When creating address/network/interface objects using the version-agnostic
+factory functions, any errors will be reported as :exc:`ValueError` with
+a generic error message that simply says the passed in value was not
+recognized as an object of that type. The lack of a specific error is
+because it's necessary to know whether the value is *supposed* to be IPv4
+or IPv6 in order to provide more detail on why it has been rejected.
+
+To support use cases where it is useful to have access to this additional
+detail, the individual class constructors actually raise the
+:exc:`ValueError` subclasses :exc:`ipaddress.AddressValueError` and
+:exc:`ipaddress.NetmaskValueError` to indicate exactly which part of
+the definition failed to parse correctly.
+
+The error messages are significantly more detailed when using the
+class constructors directly. For example::
+
+ >>> ipaddress.ip_address("192.168.0.256")
+ Traceback (most recent call last):
+ ...
+ ValueError: '192.168.0.256' does not appear to be an IPv4 or IPv6 address
+ >>> ipaddress.IPv4Address("192.168.0.256")
+ Traceback (most recent call last):
+ ...
+ ipaddress.AddressValueError: Octet 256 (> 255) not permitted in '192.168.0.256'
+
+ >>> ipaddress.ip_network("192.168.0.1/64")
+ Traceback (most recent call last):
+ ...
+ ValueError: '192.168.0.1/64' does not appear to be an IPv4 or IPv6 network
+ >>> ipaddress.IPv4Network("192.168.0.1/64")
+ Traceback (most recent call last):
+ ...
+ ipaddress.NetmaskValueError: '64' is not a valid netmask
+
+However, both of the module specific exceptions have :exc:`ValueError` as their
+parent class, so if you're not concerned with the particular type of error,
+you can still write code like the following::
+
+ try:
+ network = ipaddress.IPv4Network(address)
+ except ValueError:
+ print('address/netmask is invalid for IPv4:', address)
+
diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst
index 370c757..92af0ec 100644
--- a/Doc/howto/logging-cookbook.rst
+++ b/Doc/howto/logging-cookbook.rst
@@ -1316,6 +1316,33 @@ For more information about this configuration, you can see the `relevant
section <https://docs.djangoproject.com/en/1.3/topics/logging/#configuring-logging>`_
of the Django documentation.
+.. _cookbook-rotator-namer:
+
+Using a rotator and namer to customise log rotation processing
+--------------------------------------------------------------
+
+An example of how you can define a namer and rotator is given in the following
+snippet, which shows zlib-based compression of the log file::
+
+ def namer(name):
+ return name + ".gz"
+
+ def rotator(source, dest):
+ with open(source, "rb") as sf:
+ data = sf.read()
+ compressed = zlib.compress(data, 9)
+ with open(dest, "wb") as df:
+ df.write(compressed)
+ os.remove(source)
+
+ rh = logging.handlers.RotatingFileHandler(...)
+ rh.rotator = rotator
+ rh.namer = namer
+
+These are not "true" .gz files, as they are bare compressed data, with no
+"container" such as you’d find in an actual gzip file. This snippet is just
+for illustration purposes.
+
A more elaborate multiprocessing example
----------------------------------------
@@ -1572,7 +1599,7 @@ UTF-8, then you need to do the following:
'ASCII section\ufeffUnicode section'
- The Unicode code point ``'\feff```, when encoded using UTF-8, will be
+ The Unicode code point ``'\feff'``, when encoded using UTF-8, will be
encoded as a UTF-8 BOM -- the byte-string ``b'\xef\xbb\xbf'``.
#. Replace the ASCII section with whatever placeholders you like, but make sure
diff --git a/Doc/howto/sockets.rst b/Doc/howto/sockets.rst
index 279bb3e..ca6528b 100644
--- a/Doc/howto/sockets.rst
+++ b/Doc/howto/sockets.rst
@@ -25,8 +25,8 @@ It's not really a tutorial - you'll still have work to do in getting things
working. It doesn't cover the fine points (and there are a lot of them), but I
hope it will give you enough background to begin using them decently.
-I'm only going to talk about INET sockets, but they account for at least 99% of
-the sockets in use. And I'll only talk about STREAM sockets - unless you really
+I'm only going to talk about INET (i.e. IPv4) sockets, but they account for at least 99% of
+the sockets in use. And I'll only talk about STREAM (i.e. TCP) sockets - unless you really
know what you're doing (in which case this HOWTO isn't for you!), you'll get
better behavior and performance from a STREAM socket than anything else. I will
try to clear up the mystery of what a socket is, as well as some hints on how to
@@ -208,10 +208,10 @@ length message::
totalsent = totalsent + sent
def myreceive(self):
- msg = ''
+ msg = b''
while len(msg) < MSGLEN:
chunk = self.sock.recv(MSGLEN-len(msg))
- if chunk == '':
+ if chunk == b'':
raise RuntimeError("socket connection broken")
msg = msg + chunk
return msg
@@ -371,12 +371,6 @@ have created a new socket to ``connect`` to someone else, put it in the
potential_writers list. If it shows up in the writable list, you have a decent
chance that it has connected.
-One very nasty problem with ``select``: if somewhere in those input lists of
-sockets is one which has died a nasty death, the ``select`` will fail. You then
-need to loop through every single damn socket in all those lists and do a
-``select([sock],[],[],0)`` until you find the bad one. That timeout of 0 means
-it won't take long, but it's ugly.
-
Actually, ``select`` can be handy even with blocking sockets. It's one way of
determining whether you will block - the socket returns as readable when there's
something in the buffers. However, this still doesn't help with the problem of
@@ -386,26 +380,6 @@ determining whether the other end is done, or just busy with something else.
files. Don't try this on Windows. On Windows, ``select`` works with sockets
only. Also note that in C, many of the more advanced socket options are done
differently on Windows. In fact, on Windows I usually use threads (which work
-very, very well) with my sockets. Face it, if you want any kind of performance,
-your code will look very different on Windows than on Unix.
-
-
-Performance
------------
+very, very well) with my sockets.
-There's no question that the fastest sockets code uses non-blocking sockets and
-select to multiplex them. You can put together something that will saturate a
-LAN connection without putting any strain on the CPU.
-
-The trouble is that an app written this way can't do much of anything else -
-it needs to be ready to shuffle bytes around at all times. Assuming that your
-app is actually supposed to do something more than that, threading is the
-optimal solution, (and using non-blocking sockets will be faster than using
-blocking sockets).
-
-Finally, remember that even though blocking sockets are somewhat slower than
-non-blocking, in many cases they are the "right" solution. After all, if your
-app is driven by the data it receives over a socket, there's not much sense in
-complicating the logic just so your app can wait on ``select`` instead of
-``recv``.
diff --git a/Doc/howto/unicode.rst b/Doc/howto/unicode.rst
index f9eeae4..5d9e027 100644
--- a/Doc/howto/unicode.rst
+++ b/Doc/howto/unicode.rst
@@ -414,7 +414,7 @@ References
----------
The ``str`` type is described in the Python library reference at
-:ref:`typesseq`.
+:ref:`textseq`.
The documentation for the :mod:`unicodedata` module.
diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst
index 87f42ba..955e455 100644
--- a/Doc/howto/urllib2.rst
+++ b/Doc/howto/urllib2.rst
@@ -56,6 +56,13 @@ The simplest way to use urllib.request is as follows::
response = urllib.request.urlopen('http://python.org/')
html = response.read()
+If you wish to retrieve a resource via URL and store it in a temporary location,
+you can do so via the :func:`urlretrieve` function::
+
+ import urllib.request
+ local_filename, headers = urllib.request.urlretrieve('http://python.org/')
+ html = open(local_filename)
+
Many uses of urllib will be that simple (note that instead of an 'http:' URL we
could have used an URL starting with 'ftp:', 'file:', etc.). However, it's the
purpose of this tutorial to explain the more complicated cases, concentrating on
diff --git a/Doc/includes/mp_benchmarks.py b/Doc/includes/mp_benchmarks.py
index acdf642..3763ea9 100644
--- a/Doc/includes/mp_benchmarks.py
+++ b/Doc/includes/mp_benchmarks.py
@@ -6,16 +6,12 @@
#
import time
-import sys
import multiprocessing
import threading
import queue
import gc
-if sys.platform == 'win32':
- _timer = time.clock
-else:
- _timer = time.time
+_timer = time.perf_counter
delta = 1
diff --git a/Doc/includes/noddy2.c b/Doc/includes/noddy2.c
index 9b8eafb..9641558 100644
--- a/Doc/includes/noddy2.c
+++ b/Doc/includes/noddy2.c
@@ -24,18 +24,16 @@ Noddy_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self = (Noddy *)type->tp_alloc(type, 0);
if (self != NULL) {
self->first = PyUnicode_FromString("");
- if (self->first == NULL)
- {
+ if (self->first == NULL) {
Py_DECREF(self);
return NULL;
- }
-
+ }
+
self->last = PyUnicode_FromString("");
- if (self->last == NULL)
- {
+ if (self->last == NULL) {
Py_DECREF(self);
return NULL;
- }
+ }
self->number = 0;
}
@@ -50,10 +48,10 @@ Noddy_init(Noddy *self, PyObject *args, PyObject *kwds)
static char *kwlist[] = {"first", "last", "number", NULL};
- if (! PyArg_ParseTupleAndKeywords(args, kwds, "|OOi", kwlist,
- &first, &last,
+ if (! PyArg_ParseTupleAndKeywords(args, kwds, "|OOi", kwlist,
+ &first, &last,
&self->number))
- return -1;
+ return -1;
if (first) {
tmp = self->first;
@@ -86,15 +84,6 @@ static PyMemberDef Noddy_members[] = {
static PyObject *
Noddy_name(Noddy* self)
{
- static PyObject *format = NULL;
- PyObject *args, *result;
-
- if (format == NULL) {
- format = PyUnicode_FromString("%s %s");
- if (format == NULL)
- return NULL;
- }
-
if (self->first == NULL) {
PyErr_SetString(PyExc_AttributeError, "first");
return NULL;
@@ -105,14 +94,7 @@ Noddy_name(Noddy* self)
return NULL;
}
- args = Py_BuildValue("OO", self->first, self->last);
- if (args == NULL)
- return NULL;
-
- result = PyUnicode_Format(format, args);
- Py_DECREF(args);
-
- return result;
+ return PyUnicode_FromFormat("%S %S", self->first, self->last);
}
static PyMethodDef Noddy_methods[] = {
@@ -145,12 +127,12 @@ static PyTypeObject NoddyType = {
Py_TPFLAGS_DEFAULT |
Py_TPFLAGS_BASETYPE, /* tp_flags */
"Noddy objects", /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
Noddy_methods, /* tp_methods */
Noddy_members, /* tp_members */
0, /* tp_getset */
@@ -173,7 +155,7 @@ static PyModuleDef noddy2module = {
};
PyMODINIT_FUNC
-PyInit_noddy2(void)
+PyInit_noddy2(void)
{
PyObject* m;
diff --git a/Doc/includes/noddy3.c b/Doc/includes/noddy3.c
index 89f3a77..8a5a753 100644
--- a/Doc/includes/noddy3.c
+++ b/Doc/includes/noddy3.c
@@ -24,18 +24,16 @@ Noddy_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self = (Noddy *)type->tp_alloc(type, 0);
if (self != NULL) {
self->first = PyUnicode_FromString("");
- if (self->first == NULL)
- {
+ if (self->first == NULL) {
Py_DECREF(self);
return NULL;
- }
-
+ }
+
self->last = PyUnicode_FromString("");
- if (self->last == NULL)
- {
+ if (self->last == NULL) {
Py_DECREF(self);
return NULL;
- }
+ }
self->number = 0;
}
@@ -50,10 +48,10 @@ Noddy_init(Noddy *self, PyObject *args, PyObject *kwds)
static char *kwlist[] = {"first", "last", "number", NULL};
- if (! PyArg_ParseTupleAndKeywords(args, kwds, "|SSi", kwlist,
- &first, &last,
+ if (! PyArg_ParseTupleAndKeywords(args, kwds, "|SSi", kwlist,
+ &first, &last,
&self->number))
- return -1;
+ return -1;
if (first) {
tmp = self->first;
@@ -88,22 +86,22 @@ Noddy_getfirst(Noddy *self, void *closure)
static int
Noddy_setfirst(Noddy *self, PyObject *value, void *closure)
{
- if (value == NULL) {
- PyErr_SetString(PyExc_TypeError, "Cannot delete the first attribute");
- return -1;
- }
-
- if (! PyUnicode_Check(value)) {
- PyErr_SetString(PyExc_TypeError,
- "The first attribute value must be a string");
- return -1;
- }
-
- Py_DECREF(self->first);
- Py_INCREF(value);
- self->first = value;
-
- return 0;
+ if (value == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Cannot delete the first attribute");
+ return -1;
+ }
+
+ if (! PyUnicode_Check(value)) {
+ PyErr_SetString(PyExc_TypeError,
+ "The first attribute value must be a string");
+ return -1;
+ }
+
+ Py_DECREF(self->first);
+ Py_INCREF(value);
+ self->first = value;
+
+ return 0;
}
static PyObject *
@@ -116,30 +114,30 @@ Noddy_getlast(Noddy *self, void *closure)
static int
Noddy_setlast(Noddy *self, PyObject *value, void *closure)
{
- if (value == NULL) {
- PyErr_SetString(PyExc_TypeError, "Cannot delete the last attribute");
- return -1;
- }
-
- if (! PyUnicode_Check(value)) {
- PyErr_SetString(PyExc_TypeError,
- "The last attribute value must be a string");
- return -1;
- }
-
- Py_DECREF(self->last);
- Py_INCREF(value);
- self->last = value;
-
- return 0;
+ if (value == NULL) {
+ PyErr_SetString(PyExc_TypeError, "Cannot delete the last attribute");
+ return -1;
+ }
+
+ if (! PyUnicode_Check(value)) {
+ PyErr_SetString(PyExc_TypeError,
+ "The last attribute value must be a string");
+ return -1;
+ }
+
+ Py_DECREF(self->last);
+ Py_INCREF(value);
+ self->last = value;
+
+ return 0;
}
static PyGetSetDef Noddy_getseters[] = {
- {"first",
+ {"first",
(getter)Noddy_getfirst, (setter)Noddy_setfirst,
"first name",
NULL},
- {"last",
+ {"last",
(getter)Noddy_getlast, (setter)Noddy_setlast,
"last name",
NULL},
@@ -149,23 +147,7 @@ static PyGetSetDef Noddy_getseters[] = {
static PyObject *
Noddy_name(Noddy* self)
{
- static PyObject *format = NULL;
- PyObject *args, *result;
-
- if (format == NULL) {
- format = PyUnicode_FromString("%s %s");
- if (format == NULL)
- return NULL;
- }
-
- args = Py_BuildValue("OO", self->first, self->last);
- if (args == NULL)
- return NULL;
-
- result = PyUnicode_Format(format, args);
- Py_DECREF(args);
-
- return result;
+ return PyUnicode_FromFormat("%S %S", self->first, self->last);
}
static PyMethodDef Noddy_methods[] = {
@@ -198,12 +180,12 @@ static PyTypeObject NoddyType = {
Py_TPFLAGS_DEFAULT |
Py_TPFLAGS_BASETYPE, /* tp_flags */
"Noddy objects", /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
Noddy_methods, /* tp_methods */
Noddy_members, /* tp_members */
Noddy_getseters, /* tp_getset */
@@ -226,7 +208,7 @@ static PyModuleDef noddy3module = {
};
PyMODINIT_FUNC
-PyInit_noddy3(void)
+PyInit_noddy3(void)
{
PyObject* m;
diff --git a/Doc/includes/noddy4.c b/Doc/includes/noddy4.c
index 6a96fac..eb9622a 100644
--- a/Doc/includes/noddy4.c
+++ b/Doc/includes/noddy4.c
@@ -27,7 +27,7 @@ Noddy_traverse(Noddy *self, visitproc visit, void *arg)
return 0;
}
-static int
+static int
Noddy_clear(Noddy *self)
{
PyObject *tmp;
@@ -58,18 +58,16 @@ Noddy_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self = (Noddy *)type->tp_alloc(type, 0);
if (self != NULL) {
self->first = PyUnicode_FromString("");
- if (self->first == NULL)
- {
+ if (self->first == NULL) {
Py_DECREF(self);
return NULL;
- }
-
+ }
+
self->last = PyUnicode_FromString("");
- if (self->last == NULL)
- {
+ if (self->last == NULL) {
Py_DECREF(self);
return NULL;
- }
+ }
self->number = 0;
}
@@ -84,10 +82,10 @@ Noddy_init(Noddy *self, PyObject *args, PyObject *kwds)
static char *kwlist[] = {"first", "last", "number", NULL};
- if (! PyArg_ParseTupleAndKeywords(args, kwds, "|OOi", kwlist,
- &first, &last,
+ if (! PyArg_ParseTupleAndKeywords(args, kwds, "|OOi", kwlist,
+ &first, &last,
&self->number))
- return -1;
+ return -1;
if (first) {
tmp = self->first;
@@ -120,15 +118,6 @@ static PyMemberDef Noddy_members[] = {
static PyObject *
Noddy_name(Noddy* self)
{
- static PyObject *format = NULL;
- PyObject *args, *result;
-
- if (format == NULL) {
- format = PyUnicode_FromString("%s %s");
- if (format == NULL)
- return NULL;
- }
-
if (self->first == NULL) {
PyErr_SetString(PyExc_AttributeError, "first");
return NULL;
@@ -139,14 +128,7 @@ Noddy_name(Noddy* self)
return NULL;
}
- args = Py_BuildValue("OO", self->first, self->last);
- if (args == NULL)
- return NULL;
-
- result = PyUnicode_Format(format, args);
- Py_DECREF(args);
-
- return result;
+ return PyUnicode_FromFormat("%S %S", self->first, self->last);
}
static PyMethodDef Noddy_methods[] = {
@@ -182,10 +164,10 @@ static PyTypeObject NoddyType = {
"Noddy objects", /* tp_doc */
(traverseproc)Noddy_traverse, /* tp_traverse */
(inquiry)Noddy_clear, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
Noddy_methods, /* tp_methods */
Noddy_members, /* tp_members */
0, /* tp_getset */
@@ -208,7 +190,7 @@ static PyModuleDef noddy4module = {
};
PyMODINIT_FUNC
-PyInit_noddy4(void)
+PyInit_noddy4(void)
{
PyObject* m;
diff --git a/Doc/library/_thread.rst b/Doc/library/_thread.rst
index 369e9cd..751c3e8 100644
--- a/Doc/library/_thread.rst
+++ b/Doc/library/_thread.rst
@@ -35,6 +35,9 @@ It defines the following constants and functions:
Raised on thread-specific errors.
+ .. versionchanged:: 3.3
+ This is now a synonym of the built-in :exc:`RuntimeError`.
+
.. data:: LockType
@@ -91,7 +94,7 @@ It defines the following constants and functions:
*size* argument specifies the stack size to be used for subsequently created
threads, and must be 0 (use platform or configured default) or a positive
integer value of at least 32,768 (32kB). If changing the thread stack size is
- unsupported, a :exc:`ThreadError` is raised. If the specified stack size is
+ unsupported, a :exc:`RuntimeError` is raised. If the specified stack size is
invalid, a :exc:`ValueError` is raised and the stack size is unmodified. 32kB
is currently the minimum supported stack size value to guarantee sufficient
stack space for the interpreter itself. Note that some platforms may have
diff --git a/Doc/library/abc.rst b/Doc/library/abc.rst
index 1048b24..dca6993 100644
--- a/Doc/library/abc.rst
+++ b/Doc/library/abc.rst
@@ -18,7 +18,7 @@ regarding a type hierarchy for numbers based on ABCs.)
The :mod:`collections` module has some concrete classes that derive from
ABCs; these can, of course, be further derived. In addition the
-:mod:`collections` module has some ABCs that can be used to test whether
+:mod:`collections.abc` submodule has some ABCs that can be used to test whether
a class or instance provides a particular interface, for example, is it
hashable or a mapping.
@@ -55,6 +55,9 @@ This module provides the following class:
assert issubclass(tuple, MyABC)
assert isinstance((), MyABC)
+ .. versionchanged:: 3.3
+ Returns the registered subclass, to allow usage as a class decorator.
+
You can also override this method in an abstract base class:
.. method:: __subclasshook__(subclass)
@@ -124,19 +127,18 @@ This module provides the following class:
available as a method of ``Foo``, so it is provided separately.
-It also provides the following decorators:
+The :mod:`abc` module also provides the following decorators:
.. decorator:: abstractmethod(function)
A decorator indicating abstract methods.
- Using this decorator requires that the class's metaclass is :class:`ABCMeta` or
- is derived from it.
- A class that has a metaclass derived from :class:`ABCMeta`
- cannot be instantiated unless all of its abstract methods and
- properties are overridden.
- The abstract methods can be called using any of the normal 'super' call
- mechanisms.
+ Using this decorator requires that the class's metaclass is :class:`ABCMeta`
+ or is derived from it. A class that has a metaclass derived from
+ :class:`ABCMeta` cannot be instantiated unless all of its abstract methods
+ and properties are overridden. The abstract methods can be called using any
+ of the normal 'super' call mechanisms. :func:`abstractmethod` may be used
+ to declare abstract methods for properties and descriptors.
Dynamically adding abstract methods to a class, or attempting to modify the
abstraction status of a method or class once it is created, are not
@@ -144,12 +146,52 @@ It also provides the following decorators:
regular inheritance; "virtual subclasses" registered with the ABC's
:meth:`register` method are not affected.
- Usage::
+ When :func:`abstractmethod` is applied in combination with other method
+ descriptors, it should be applied as the innermost decorator, as shown in
+ the following usage examples::
class C(metaclass=ABCMeta):
@abstractmethod
def my_abstract_method(self, ...):
...
+ @classmethod
+ @abstractmethod
+ def my_abstract_classmethod(cls, ...):
+ ...
+ @staticmethod
+ @abstractmethod
+ def my_abstract_staticmethod(...):
+ ...
+
+ @property
+ @abstractmethod
+ def my_abstract_property(self):
+ ...
+ @my_abstract_property.setter
+ @abstractmethod
+ def my_abstract_property(self, val):
+ ...
+
+ @abstractmethod
+ def _get_x(self):
+ ...
+ @abstractmethod
+ def _set_x(self, val):
+ ...
+ x = property(_get_x, _set_x)
+
+ In order to correctly interoperate with the abstract base class machinery,
+ the descriptor must identify itself as abstract using
+ :attr:`__isabstractmethod__`. In general, this attribute should be ``True``
+ if any of the methods used to compose the descriptor are abstract. For
+ example, Python's built-in property does the equivalent of::
+
+ class Descriptor:
+ ...
+ @property
+ def __isabstractmethod__(self):
+ return any(getattr(f, '__isabstractmethod__', False) for
+ f in (self._fget, self._fset, self._fdel))
.. note::
@@ -174,6 +216,8 @@ It also provides the following decorators:
...
.. versionadded:: 3.2
+ .. deprecated:: 3.3
+ Use :class:`classmethod` with :func:`abstractmethod` instead.
.. decorator:: abstractstaticmethod(function)
@@ -189,18 +233,19 @@ It also provides the following decorators:
...
.. versionadded:: 3.2
+ .. deprecated:: 3.3
+ Use :class:`staticmethod` with :func:`abstractmethod` instead.
-.. function:: abstractproperty(fget=None, fset=None, fdel=None, doc=None)
+.. decorator:: abstractproperty(fget=None, fset=None, fdel=None, doc=None)
A subclass of the built-in :func:`property`, indicating an abstract property.
- Using this function requires that the class's metaclass is :class:`ABCMeta` or
- is derived from it.
- A class that has a metaclass derived from :class:`ABCMeta` cannot be
- instantiated unless all of its abstract methods and properties are overridden.
- The abstract properties can be called using any of the normal
- 'super' call mechanisms.
+ Using this function requires that the class's metaclass is :class:`ABCMeta`
+ or is derived from it. A class that has a metaclass derived from
+ :class:`ABCMeta` cannot be instantiated unless all of its abstract methods
+ and properties are overridden. The abstract properties can be called using
+ any of the normal 'super' call mechanisms.
Usage::
@@ -217,6 +262,9 @@ It also provides the following decorators:
def setx(self, value): ...
x = abstractproperty(getx, setx)
+ .. deprecated:: 3.3
+ Use :class:`property` with :func:`abstractmethod` instead
+
.. rubric:: Footnotes
diff --git a/Doc/library/archiving.rst b/Doc/library/archiving.rst
index 75d137c..c928494 100644
--- a/Doc/library/archiving.rst
+++ b/Doc/library/archiving.rst
@@ -5,8 +5,9 @@ Data Compression and Archiving
******************************
The modules described in this chapter support data compression with the zlib,
-gzip, and bzip2 algorithms, and the creation of ZIP- and tar-format archives.
-See also :ref:`archiving-operations` provided by the :mod:`shutil` module.
+gzip, bzip2 and lzma algorithms, and the creation of ZIP- and tar-format
+archives. See also :ref:`archiving-operations` provided by the :mod:`shutil`
+module.
.. toctree::
@@ -14,5 +15,6 @@ See also :ref:`archiving-operations` provided by the :mod:`shutil` module.
zlib.rst
gzip.rst
bz2.rst
+ lzma.rst
zipfile.rst
tarfile.rst
diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst
index 5273e9b..9f6a1ea 100644
--- a/Doc/library/argparse.rst
+++ b/Doc/library/argparse.rst
@@ -371,16 +371,16 @@ formatter_class
^^^^^^^^^^^^^^^
:class:`ArgumentParser` objects allow the help formatting to be customized by
-specifying an alternate formatting class. Currently, there are three such
+specifying an alternate formatting class. Currently, there are four such
classes:
.. class:: RawDescriptionHelpFormatter
RawTextHelpFormatter
ArgumentDefaultsHelpFormatter
+ MetavarTypeHelpFormatter
-The first two allow more control over how textual descriptions are displayed,
-while the last automatically adds information about argument default values.
-
+:class:`RawDescriptionHelpFormatter` and :class:`RawTextHelpFormatter` give
+more control over how textual descriptions are displayed.
By default, :class:`ArgumentParser` objects line-wrap the description_ and
epilog_ texts in command-line help messages::
@@ -433,8 +433,8 @@ should not be line-wrapped::
:class:`RawTextHelpFormatter` maintains whitespace for all sorts of help text,
including argument descriptions.
-The other formatter class available, :class:`ArgumentDefaultsHelpFormatter`,
-will add information about the default value of each of the arguments::
+:class:`ArgumentDefaultsHelpFormatter` automatically adds information about
+default values to each of the argument help messages::
>>> parser = argparse.ArgumentParser(
... prog='PROG',
@@ -451,6 +451,25 @@ will add information about the default value of each of the arguments::
-h, --help show this help message and exit
--foo FOO FOO! (default: 42)
+:class:`MetavarTypeHelpFormatter` uses the name of the type_ argument for each
+argument as the display name for its values (rather than using the dest_
+as the regular formatter does)::
+
+ >>> parser = argparse.ArgumentParser(
+ ... prog='PROG',
+ ... formatter_class=argparse.MetavarTypeHelpFormatter)
+ >>> parser.add_argument('--foo', type=int)
+ >>> parser.add_argument('bar', type=float)
+ >>> parser.print_help()
+ usage: PROG [-h] [--foo int] float
+
+ positional arguments:
+ float
+
+ optional arguments:
+ -h, --help show this help message and exit
+ --foo int
+
conflict_handler
^^^^^^^^^^^^^^^^
diff --git a/Doc/library/array.rst b/Doc/library/array.rst
index d563cce..8f6943a 100644
--- a/Doc/library/array.rst
+++ b/Doc/library/array.rst
@@ -14,36 +14,54 @@ them is constrained. The type is specified at object creation time by using a
:dfn:`type code`, which is a single character. The following type codes are
defined:
-+-----------+----------------+-------------------+-----------------------+
-| Type code | C Type | Python Type | Minimum size in bytes |
-+===========+================+===================+=======================+
-| ``'b'`` | signed char | int | 1 |
-+-----------+----------------+-------------------+-----------------------+
-| ``'B'`` | unsigned char | int | 1 |
-+-----------+----------------+-------------------+-----------------------+
-| ``'u'`` | Py_UNICODE | Unicode character | 2 (see note) |
-+-----------+----------------+-------------------+-----------------------+
-| ``'h'`` | signed short | int | 2 |
-+-----------+----------------+-------------------+-----------------------+
-| ``'H'`` | unsigned short | int | 2 |
-+-----------+----------------+-------------------+-----------------------+
-| ``'i'`` | signed int | int | 2 |
-+-----------+----------------+-------------------+-----------------------+
-| ``'I'`` | unsigned int | int | 2 |
-+-----------+----------------+-------------------+-----------------------+
-| ``'l'`` | signed long | int | 4 |
-+-----------+----------------+-------------------+-----------------------+
-| ``'L'`` | unsigned long | int | 4 |
-+-----------+----------------+-------------------+-----------------------+
-| ``'f'`` | float | float | 4 |
-+-----------+----------------+-------------------+-----------------------+
-| ``'d'`` | double | float | 8 |
-+-----------+----------------+-------------------+-----------------------+
-
-.. note::
-
- The ``'u'`` typecode corresponds to Python's unicode character. On narrow
- Unicode builds this is 2-bytes, on wide builds this is 4-bytes.
++-----------+--------------------+-------------------+-----------------------+-------+
+| Type code | C Type | Python Type | Minimum size in bytes | Notes |
++===========+====================+===================+=======================+=======+
+| ``'b'`` | signed char | int | 1 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'B'`` | unsigned char | int | 1 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'u'`` | Py_UNICODE | Unicode character | 2 | \(1) |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'h'`` | signed short | int | 2 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'H'`` | unsigned short | int | 2 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'i'`` | signed int | int | 2 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'I'`` | unsigned int | int | 2 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'l'`` | signed long | int | 4 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'L'`` | unsigned long | int | 4 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'q'`` | signed long long | int | 8 | \(2) |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'Q'`` | unsigned long long | int | 8 | \(2) |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'f'`` | float | float | 4 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+| ``'d'`` | double | float | 8 | |
++-----------+--------------------+-------------------+-----------------------+-------+
+
+Notes:
+
+(1)
+ The ``'u'`` type code corresponds to Python's obsolete unicode character
+ (:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the
+ platform, it can be 16 bits or 32 bits.
+
+ ``'u'`` will be removed together with the rest of the :c:type:`Py_UNICODE`
+ API.
+
+ .. deprecated-removed:: 3.3 4.0
+
+(2)
+ The ``'q'`` and ``'Q'`` type codes are available only if
+ the platform C compiler used to build Python supports C :c:type:`long long`,
+ or, on Windows, :c:type:`__int64`.
+
+ .. versionadded:: 3.3
The actual representation of values is determined by the machine architecture
(strictly speaking, by the C implementation). The actual size can be accessed
diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst
index e2c0b6d..16de3ca 100644
--- a/Doc/library/ast.rst
+++ b/Doc/library/ast.rst
@@ -96,9 +96,6 @@ Node classes
Abstract Grammar
----------------
-The module defines a string constant ``__version__`` which is the decimal
-Subversion revision number of the file shown below.
-
The abstract grammar is currently defined as follows:
.. literalinclude:: ../../Parser/Python.asdl
diff --git a/Doc/library/asynchat.rst b/Doc/library/asynchat.rst
index 75b3cda..55c61d7 100644
--- a/Doc/library/asynchat.rst
+++ b/Doc/library/asynchat.rst
@@ -197,6 +197,9 @@ The :meth:`handle_request` method is called once all relevant input has been
marshalled, after setting the channel terminator to ``None`` to ensure that
any extraneous data sent by the web client are ignored. ::
+
+ import asynchat
+
class http_request_handler(asynchat.async_chat):
def __init__(self, sock, addr, sessions, log):
diff --git a/Doc/library/asyncore.rst b/Doc/library/asyncore.rst
index 619b7bb..7cacca1 100644
--- a/Doc/library/asyncore.rst
+++ b/Doc/library/asyncore.rst
@@ -184,12 +184,15 @@ any that have been added to the map during asynchronous service) is closed.
Most of these are nearly identical to their socket partners.
- .. method:: create_socket(family, type)
+ .. method:: create_socket(family=socket.AF_INET, type=socket.SOCK_STREAM)
This is identical to the creation of a normal socket, and will use the
same options for creation. Refer to the :mod:`socket` documentation for
information on creating sockets.
+ .. versionchanged:: 3.3
+ *family* and *type* arguments can be omitted.
+
.. method:: connect(address)
@@ -274,13 +277,13 @@ asyncore Example basic HTTP client
Here is a very basic HTTP client that uses the :class:`dispatcher` class to
implement its socket handling::
- import asyncore, socket
+ import asyncore
class HTTPClient(asyncore.dispatcher):
def __init__(self, host, path):
asyncore.dispatcher.__init__(self)
- self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.create_socket()
self.connect( (host, 80) )
self.buffer = bytes('GET %s HTTP/1.0\r\nHost: %s\r\n\r\n' %
(path, host), 'ascii')
@@ -314,7 +317,6 @@ Here is a basic echo server that uses the :class:`dispatcher` class to accept
connections and dispatches the incoming connections to a handler::
import asyncore
- import socket
class EchoHandler(asyncore.dispatcher_with_send):
@@ -327,7 +329,7 @@ connections and dispatches the incoming connections to a handler::
def __init__(self, host, port):
asyncore.dispatcher.__init__(self)
- self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.create_socket()
self.set_reuse_addr()
self.bind((host, port))
self.listen(5)
@@ -338,4 +340,3 @@ connections and dispatches the incoming connections to a handler::
server = EchoServer('localhost', 8080)
asyncore.loop()
-
diff --git a/Doc/library/atexit.rst b/Doc/library/atexit.rst
index 7c76bab..2e22cab 100644
--- a/Doc/library/atexit.rst
+++ b/Doc/library/atexit.rst
@@ -9,13 +9,14 @@
The :mod:`atexit` module defines functions to register and unregister cleanup
functions. Functions thus registered are automatically executed upon normal
-interpreter termination. The order in which the functions are called is not
-defined; if you have cleanup operations that depend on each other, you should
-wrap them in a function and register that one. This keeps :mod:`atexit` simple.
+interpreter termination. :mod:`atexit` runs these functions in the *reverse*
+order in which they were registered; if you register ``A``, ``B``, and ``C``,
+at interpreter termination time they will be run in the order ``C``, ``B``,
+``A``.
-Note: the functions registered via this module are not called when the program
-is killed by a signal not handled by Python, when a Python fatal internal error
-is detected, or when :func:`os._exit` is called.
+**Note:** The functions registered via this module are not called when the
+program is killed by a signal not handled by Python, when a Python fatal
+internal error is detected, or when :func:`os._exit` is called.
.. function:: register(func, *args, **kargs)
@@ -67,8 +68,9 @@ automatically when the program terminates without relying on the application
making an explicit call into this module at termination. ::
try:
- _count = int(open("/tmp/counter").read())
- except IOError:
+ with open("/tmp/counter") as infile:
+ _count = int(infile.read())
+ except FileNotFoundError:
_count = 0
def incrcounter(n):
@@ -76,7 +78,8 @@ making an explicit call into this module at termination. ::
_count = _count + n
def savecounter():
- open("/tmp/counter", "w").write("%d" % _count)
+ with open("/tmp/counter", "w") as outfile:
+ outfile.write("%d" % _count)
import atexit
atexit.register(savecounter)
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
index c08df15..ade0f07 100644
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -18,9 +18,14 @@ POST request. The encoding algorithm is not the same as the
There are two interfaces provided by this module. The modern interface
supports encoding and decoding ASCII byte string objects using all three
-alphabets. The legacy interface provides for encoding and decoding to and from
-file-like objects as well as byte strings, but only using the Base64 standard
-alphabet.
+alphabets. Additionally, the decoding functions of the modern interface also
+accept Unicode strings containing only ASCII characters. The legacy interface
+provides for encoding and decoding to and from file-like objects as well as
+byte strings, but only using the Base64 standard alphabet.
+
+.. versionchanged:: 3.3
+ ASCII-only Unicode strings are now accepted by the decoding functions of
+ the modern interface.
The modern interface provides:
diff --git a/Doc/library/binary.rst b/Doc/library/binary.rst
new file mode 100644
index 0000000..51fbdc1
--- /dev/null
+++ b/Doc/library/binary.rst
@@ -0,0 +1,23 @@
+.. _binaryservices:
+
+********************
+Binary Data Services
+********************
+
+The modules described in this chapter provide some basic services operations
+for manipulation of binary data. Other operations on binary data, specifically
+in relation to file formats and network protocols, are described in the
+relevant sections.
+
+Some libraries described under :ref:`textservices` also work with either
+ASCII-compatible binary formats (for example, :mod:`re`) or all binary data
+(for example, :mod:`difflib`).
+
+In addition, see the documentation for Python's built-in binary data types in
+:ref:`binaryseq`.
+
+.. toctree::
+
+ struct.rst
+ codecs.rst
+
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst
index 2aa3702..baf430d 100644
--- a/Doc/library/binascii.rst
+++ b/Doc/library/binascii.rst
@@ -20,8 +20,13 @@ higher-level modules.
.. note::
- Encoding and decoding functions do not accept Unicode strings. Only bytestring
- and bytearray objects can be processed.
+ ``a2b_*`` functions accept Unicode strings containing only ASCII characters.
+ Other functions only accept bytes and bytes-compatible objects (such as
+ bytearray objects and other objects implementing the buffer API).
+
+ .. versionchanged:: 3.3
+ ASCII-only unicode strings are now accepted by the ``a2b_*`` functions.
+
The :mod:`binascii` module defines the following functions:
diff --git a/Doc/library/bz2.rst b/Doc/library/bz2.rst
index 93144b6..d06a39a 100644
--- a/Doc/library/bz2.rst
+++ b/Doc/library/bz2.rst
@@ -1,201 +1,202 @@
-:mod:`bz2` --- Compression compatible with :program:`bzip2`
-===========================================================
+:mod:`bz2` --- Support for :program:`bzip2` compression
+=======================================================
.. module:: bz2
- :synopsis: Interface to compression and decompression routines
- compatible with bzip2.
+ :synopsis: Interfaces for bzip2 compression and decompression.
.. moduleauthor:: Gustavo Niemeyer <niemeyer@conectiva.com>
+.. moduleauthor:: Nadeem Vawda <nadeem.vawda@gmail.com>
.. sectionauthor:: Gustavo Niemeyer <niemeyer@conectiva.com>
+.. sectionauthor:: Nadeem Vawda <nadeem.vawda@gmail.com>
-This module provides a comprehensive interface for the bz2 compression library.
-It implements a complete file interface, one-shot (de)compression functions, and
-types for sequential (de)compression.
+This module provides a comprehensive interface for compressing and
+decompressing data using the bzip2 compression algorithm.
-Here is a summary of the features offered by the bz2 module:
+The :mod:`bz2` module contains:
-* :class:`BZ2File` class implements a complete file interface, including
- :meth:`~BZ2File.readline`, :meth:`~BZ2File.readlines`,
- :meth:`~BZ2File.writelines`, :meth:`~BZ2File.seek`, etc;
+* The :func:`.open` function and :class:`BZ2File` class for reading and
+ writing compressed files.
+* The :class:`BZ2Compressor` and :class:`BZ2Decompressor` classes for
+ incremental (de)compression.
+* The :func:`compress` and :func:`decompress` functions for one-shot
+ (de)compression.
-* :class:`BZ2File` class implements emulated :meth:`~BZ2File.seek` support;
-
-* :class:`BZ2File` class implements universal newline support;
-
-* :class:`BZ2File` class offers an optimized line iteration using a readahead
- algorithm;
-
-* Sequential (de)compression supported by :class:`BZ2Compressor` and
- :class:`BZ2Decompressor` classes;
-
-* One-shot (de)compression supported by :func:`compress` and :func:`decompress`
- functions;
-
-* Thread safety uses individual locking mechanism.
+All of the classes in this module may safely be accessed from multiple threads.
(De)compression of files
------------------------
-Handling of compressed files is offered by the :class:`BZ2File` class.
+.. function:: open(filename, mode='r', compresslevel=9, encoding=None, errors=None, newline=None)
+ Open a bzip2-compressed file in binary or text mode, returning a :term:`file
+ object`.
-.. index::
- single: universal newlines; bz2.BZ2File class
+ As with the constructor for :class:`BZ2File`, the *filename* argument can be
+ an actual filename (a :class:`str` or :class:`bytes` object), or an existing
+ file object to read from or write to.
-.. class:: BZ2File(filename, mode='r', buffering=0, compresslevel=9)
+ The *mode* argument can be any of ``'r'``, ``'rb'``, ``'w'``, ``'wb'``,
+ ``'a'``, or ``'ab'`` for binary mode, or ``'rt'``, ``'wt'``, or ``'at'`` for
+ text mode. The default is ``'rb'``.
- Open a bz2 file. Mode can be either ``'r'`` or ``'w'``, for reading (default)
- or writing. When opened for writing, the file will be created if it doesn't
- exist, and truncated otherwise. If *buffering* is given, ``0`` means
- unbuffered, and larger numbers specify the buffer size; the default is
- ``0``. If *compresslevel* is given, it must be a number between ``1`` and
- ``9``; the default is ``9``. Add a ``'U'`` to mode to open the file for input
- in :term:`universal newlines` mode. Any line ending in the input file will be
- seen as a ``'\n'`` in Python. Also, a file so opened gains the attribute
- :attr:`newlines`; the value for this attribute is one of ``None`` (no newline
- read yet), ``'\r'``, ``'\n'``, ``'\r\n'`` or a tuple containing all the
- newline types seen. Universal newlines are available only when
- reading. Instances support iteration in the same way as normal :class:`file`
- instances.
+ The *compresslevel* argument is an integer from 1 to 9, as for the
+ :class:`BZ2File` constructor.
- :class:`BZ2File` supports the :keyword:`with` statement.
+ For binary mode, this function is equivalent to the :class:`BZ2File`
+ constructor: ``BZ2File(filename, mode, compresslevel=compresslevel)``. In
+ this case, the *encoding*, *errors* and *newline* arguments must not be
+ provided.
- .. versionchanged:: 3.1
- Support for the :keyword:`with` statement was added.
+ For text mode, a :class:`BZ2File` object is created, and wrapped in an
+ :class:`io.TextIOWrapper` instance with the specified encoding, error
+ handling behavior, and line ending(s).
+ .. versionadded:: 3.3
- .. note::
- This class does not support input files containing multiple streams (such
- as those produced by the :program:`pbzip2` tool). When reading such an
- input file, only the first stream will be accessible. If you require
- support for multi-stream files, consider using the third-party
- :mod:`bz2file` module (available from
- `PyPI <http://pypi.python.org/pypi/bz2file>`_). This module provides a
- backport of Python 3.3's :class:`BZ2File` class, which does support
- multi-stream files.
+.. class:: BZ2File(filename, mode='r', buffering=None, compresslevel=9)
+ Open a bzip2-compressed file in binary mode.
- .. method:: close()
+ If *filename* is a :class:`str` or :class:`bytes` object, open the named file
+ directly. Otherwise, *filename* should be a :term:`file object`, which will
+ be used to read or write the compressed data.
- Close the file. Sets data attribute :attr:`closed` to true. A closed file
- cannot be used for further I/O operations. :meth:`close` may be called
- more than once without error.
+ The *mode* argument can be either ``'r'`` for reading (default), ``'w'`` for
+ overwriting, or ``'a'`` for appending. These can equivalently be given as
+ ``'rb'``, ``'wb'``, and ``'ab'`` respectively.
+ If *filename* is a file object (rather than an actual file name), a mode of
+ ``'w'`` does not truncate the file, and is instead equivalent to ``'a'``.
- .. method:: read([size])
+ The *buffering* argument is ignored. Its use is deprecated.
- Read at most *size* uncompressed bytes, returned as a byte string. If the
- *size* argument is negative or omitted, read until EOF is reached.
+ If *mode* is ``'w'`` or ``'a'``, *compresslevel* can be a number between
+ ``1`` and ``9`` specifying the level of compression: ``1`` produces the
+ least compression, and ``9`` (default) produces the most compression.
+ If *mode* is ``'r'``, the input file may be the concatenation of multiple
+ compressed streams.
- .. method:: readline([size])
+ :class:`BZ2File` provides all of the members specified by the
+ :class:`io.BufferedIOBase`, except for :meth:`detach` and :meth:`truncate`.
+ Iteration and the :keyword:`with` statement are supported.
- Return the next line from the file, as a byte string, retaining newline.
- A non-negative *size* argument limits the maximum number of bytes to
- return (an incomplete line may be returned then). Return an empty byte
- string at EOF.
+ :class:`BZ2File` also provides the following method:
+ .. method:: peek([n])
- .. method:: readlines([size])
+ Return buffered data without advancing the file position. At least one
+ byte of data will be returned (unless at EOF). The exact number of bytes
+ returned is unspecified.
- Return a list of lines read. The optional *size* argument, if given, is an
- approximate bound on the total number of bytes in the lines returned.
+ .. versionadded:: 3.3
+ .. versionchanged:: 3.1
+ Support for the :keyword:`with` statement was added.
- .. method:: seek(offset[, whence])
+ .. versionchanged:: 3.3
+ The :meth:`fileno`, :meth:`readable`, :meth:`seekable`, :meth:`writable`,
+ :meth:`read1` and :meth:`readinto` methods were added.
- Move to new file position. Argument *offset* is a byte count. Optional
- argument *whence* defaults to ``os.SEEK_SET`` or ``0`` (offset from start
- of file; offset should be ``>= 0``); other values are ``os.SEEK_CUR`` or
- ``1`` (move relative to current position; offset can be positive or
- negative), and ``os.SEEK_END`` or ``2`` (move relative to end of file;
- offset is usually negative, although many platforms allow seeking beyond
- the end of a file).
+ .. versionchanged:: 3.3
+ Support was added for *filename* being a :term:`file object` instead of an
+ actual filename.
- Note that seeking of bz2 files is emulated, and depending on the
- parameters the operation may be extremely slow.
+ .. versionchanged:: 3.3
+ The ``'a'`` (append) mode was added, along with support for reading
+ multi-stream files.
- .. method:: tell()
+Incremental (de)compression
+---------------------------
- Return the current file position, an integer.
+.. class:: BZ2Compressor(compresslevel=9)
+ Create a new compressor object. This object may be used to compress data
+ incrementally. For one-shot compression, use the :func:`compress` function
+ instead.
- .. method:: write(data)
+ *compresslevel*, if given, must be a number between ``1`` and ``9``. The
+ default is ``9``.
- Write the byte string *data* to file. Note that due to buffering,
- :meth:`close` may be needed before the file on disk reflects the data
- written.
+ .. method:: compress(data)
+ Provide data to the compressor object. Returns a chunk of compressed data
+ if possible, or an empty byte string otherwise.
- .. method:: writelines(sequence_of_byte_strings)
+ When you have finished providing data to the compressor, call the
+ :meth:`flush` method to finish the compression process.
- Write the sequence of byte strings to the file. Note that newlines are not
- added. The sequence can be any iterable object producing byte strings.
- This is equivalent to calling write() for each byte string.
+ .. method:: flush()
-Sequential (de)compression
---------------------------
+ Finish the compression process. Returns the compressed data left in
+ internal buffers.
-Sequential compression and decompression is done using the classes
-:class:`BZ2Compressor` and :class:`BZ2Decompressor`.
+ The compressor object may not be used after this method has been called.
-.. class:: BZ2Compressor(compresslevel=9)
+.. class:: BZ2Decompressor()
- Create a new compressor object. This object may be used to compress data
- sequentially. If you want to compress data in one shot, use the
- :func:`compress` function instead. The *compresslevel* parameter, if given,
- must be a number between ``1`` and ``9``; the default is ``9``.
+ Create a new decompressor object. This object may be used to decompress data
+ incrementally. For one-shot compression, use the :func:`decompress` function
+ instead.
- .. method:: compress(data)
+ .. note::
+ This class does not transparently handle inputs containing multiple
+ compressed streams, unlike :func:`decompress` and :class:`BZ2File`. If
+ you need to decompress a multi-stream input with :class:`BZ2Decompressor`,
+ you must use a new decompressor for each stream.
- Provide more data to the compressor object. It will return chunks of
- compressed data whenever possible. When you've finished providing data to
- compress, call the :meth:`flush` method to finish the compression process,
- and return what is left in internal buffers.
+ .. method:: decompress(data)
+ Provide data to the decompressor object. Returns a chunk of decompressed
+ data if possible, or an empty byte string otherwise.
- .. method:: flush()
+ Attempting to decompress data after the end of the current stream is
+ reached raises an :exc:`EOFError`. If any data is found after the end of
+ the stream, it is ignored and saved in the :attr:`unused_data` attribute.
- Finish the compression process and return what is left in internal
- buffers. You must not use the compressor object after calling this method.
+ .. attribute:: eof
-.. class:: BZ2Decompressor()
+ True if the end-of-stream marker has been reached.
- Create a new decompressor object. This object may be used to decompress data
- sequentially. If you want to decompress data in one shot, use the
- :func:`decompress` function instead.
+ .. versionadded:: 3.3
- .. method:: decompress(data)
- Provide more data to the decompressor object. It will return chunks of
- decompressed data whenever possible. If you try to decompress data after
- the end of stream is found, :exc:`EOFError` will be raised. If any data
- was found after the end of stream, it'll be ignored and saved in
- :attr:`unused_data` attribute.
+ .. attribute:: unused_data
+
+ Data found after the end of the compressed stream.
+
+ If this attribute is accessed before the end of the stream has been
+ reached, its value will be ``b''``.
One-shot (de)compression
------------------------
-One-shot compression and decompression is provided through the :func:`compress`
-and :func:`decompress` functions.
+.. function:: compress(data, compresslevel=9)
+ Compress *data*.
-.. function:: compress(data, compresslevel=9)
+ *compresslevel*, if given, must be a number between ``1`` and ``9``. The
+ default is ``9``.
- Compress *data* in one shot. If you want to compress data sequentially, use
- an instance of :class:`BZ2Compressor` instead. The *compresslevel* parameter,
- if given, must be a number between ``1`` and ``9``; the default is ``9``.
+ For incremental compression, use a :class:`BZ2Compressor` instead.
.. function:: decompress(data)
- Decompress *data* in one shot. If you want to decompress data sequentially,
- use an instance of :class:`BZ2Decompressor` instead.
+ Decompress *data*.
+
+ If *data* is the concatenation of multiple compressed streams, decompress
+ all of the streams.
+
+ For incremental decompression, use a :class:`BZ2Decompressor` instead.
+
+ .. versionchanged:: 3.3
+ Support for multi-stream inputs was added.
diff --git a/Doc/library/chunk.rst b/Doc/library/chunk.rst
index d3558a4..c1ba497 100644
--- a/Doc/library/chunk.rst
+++ b/Doc/library/chunk.rst
@@ -84,8 +84,9 @@ instance will fail with a :exc:`EOFError` exception.
Close and skip to the end of the chunk. This does not close the
underlying file.
- The remaining methods will raise :exc:`IOError` if called after the
- :meth:`close` method has been called.
+ The remaining methods will raise :exc:`OSError` if called after the
+ :meth:`close` method has been called. Before Python 3.3, they used to
+ raise :exc:`IOError`, now an alias of :exc:`OSError`.
.. method:: isatty()
diff --git a/Doc/library/cmd.rst b/Doc/library/cmd.rst
index 943c04a..9722928 100644
--- a/Doc/library/cmd.rst
+++ b/Doc/library/cmd.rst
@@ -285,8 +285,8 @@ immediate playback::
def do_playback(self, arg):
'Playback commands from a file: PLAYBACK rose.cmd'
self.close()
- cmds = open(arg).read().splitlines()
- self.cmdqueue.extend(cmds)
+ with open(arg) as f:
+ self.cmdqueue.extend(f.read().splitlines())
def precmd(self, line):
line = line.lower()
if self.file and 'playback' not in line:
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
index 762bb98..071fc23 100644
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -458,7 +458,8 @@ define in order to be compatible with the Python codec registry.
.. method:: reset()
- Reset the encoder to the initial state.
+ Reset the encoder to the initial state. The output is discarded: call
+ ``.encode('', final=True)`` to reset the encoder and to get the output.
.. method:: IncrementalEncoder.getstate()
@@ -786,11 +787,9 @@ methods and attributes from the underlying stream.
Encodings and Unicode
---------------------
-Strings are stored internally as sequences of codepoints (to be precise
-as :c:type:`Py_UNICODE` arrays). Depending on the way Python is compiled (either
-via ``--without-wide-unicode`` or ``--with-wide-unicode``, with the
-former being the default) :c:type:`Py_UNICODE` is either a 16-bit or 32-bit data
-type. Once a string object is used outside of CPU and memory, CPU endianness
+Strings are stored internally as sequences of codepoints in range ``0 - 10FFFF``
+(see :pep:`393` for more details about the implementation).
+Once a string object is used outside of CPU and memory, CPU endianness
and how these arrays are stored as bytes become an issue. Transforming a
string object into a sequence of bytes is called encoding and recreating the
string object from the sequence of bytes is known as decoding. There are many
@@ -901,6 +900,15 @@ is meant to be exhaustive. Notice that spelling alternatives that only differ in
case or use a hyphen instead of an underscore are also valid aliases; therefore,
e.g. ``'utf-8'`` is a valid alias for the ``'utf_8'`` codec.
+.. impl-detail::
+
+ Some common encodings can bypass the codecs lookup machinery to
+ improve performance. These optimization opportunities are only
+ recognized by CPython for a limited set of aliases: utf-8, utf8,
+ latin-1, latin1, iso-8859-1, mbcs (Windows only), ascii, utf-16,
+ and utf-32. Using alternative spellings for these encodings may
+ result in slower execution.
+
Many of the character sets support the same languages. They vary in individual
characters (e.g. whether the EURO SIGN is supported or not), and in the
assignment of characters to code positions. For the European languages in
@@ -1003,6 +1011,11 @@ particular, the following variants typically exist:
+-----------------+--------------------------------+--------------------------------+
| cp1258 | windows-1258 | Vietnamese |
+-----------------+--------------------------------+--------------------------------+
+| cp65001 | | Windows only: Windows UTF-8 |
+| | | (``CP_UTF8``) |
+| | | |
+| | | .. versionadded:: 3.3 |
++-----------------+--------------------------------+--------------------------------+
| euc_jp | eucjp, ujis, u-jis | Japanese |
+-----------------+--------------------------------+--------------------------------+
| euc_jis_2004 | jisx0213, eucjis2004 | Japanese |
@@ -1160,6 +1173,8 @@ particular, the following variants typically exist:
| unicode_internal | | Return the internal |
| | | representation of the |
| | | operand |
+| | | |
+| | | .. deprecated:: 3.3 |
+--------------------+---------+---------------------------+
The following codecs provide bytes-to-bytes mappings.
@@ -1272,12 +1287,13 @@ functions can be used directly if desired.
.. module:: encodings.mbcs
:synopsis: Windows ANSI codepage
-Encode operand according to the ANSI codepage (CP_ACP). This codec only
-supports ``'strict'`` and ``'replace'`` error handlers to encode, and
-``'strict'`` and ``'ignore'`` error handlers to decode.
+Encode operand according to the ANSI codepage (CP_ACP).
Availability: Windows only.
+.. versionchanged:: 3.3
+ Support any error handler.
+
.. versionchanged:: 3.2
Before 3.2, the *errors* argument was ignored; ``'replace'`` was always used
to encode, and ``'ignore'`` to decode.
diff --git a/Doc/library/collections.abc.rst b/Doc/library/collections.abc.rst
new file mode 100644
index 0000000..9873489
--- /dev/null
+++ b/Doc/library/collections.abc.rst
@@ -0,0 +1,182 @@
+:mod:`collections.abc` --- Abstract Base Classes for Containers
+===============================================================
+
+.. module:: collections.abc
+ :synopsis: Abstract base classes for containers
+.. moduleauthor:: Raymond Hettinger <python at rcn.com>
+.. sectionauthor:: Raymond Hettinger <python at rcn.com>
+
+.. versionadded:: 3.3
+ Formerly, this module was part of the :mod:`collections` module.
+
+.. testsetup:: *
+
+ from collections import *
+ import itertools
+ __name__ = '<doctest>'
+
+**Source code:** :source:`Lib/collections/abc.py`
+
+--------------
+
+This module provides :term:`abstract base classes <abstract base class>` that
+can be used to test whether a class provides a particular interface; for
+example, whether it is hashable or whether it is a mapping.
+
+
+.. _collections-abstract-base-classes:
+
+Collections Abstract Base Classes
+---------------------------------
+
+The collections module offers the following :term:`ABCs <abstract base class>`:
+
+========================= ===================== ====================== ====================================================
+ABC Inherits from Abstract Methods Mixin Methods
+========================= ===================== ====================== ====================================================
+:class:`Container` ``__contains__``
+:class:`Hashable` ``__hash__``
+:class:`Iterable` ``__iter__``
+:class:`Iterator` :class:`Iterable` ``__next__`` ``__iter__``
+:class:`Sized` ``__len__``
+:class:`Callable` ``__call__``
+
+:class:`Sequence` :class:`Sized`, ``__getitem__`` ``__contains__``, ``__iter__``, ``__reversed__``,
+ :class:`Iterable`, ``index``, and ``count``
+ :class:`Container`
+
+:class:`MutableSequence` :class:`Sequence` ``__setitem__``, Inherited :class:`Sequence` methods and
+ ``__delitem__``, ``append``, ``reverse``, ``extend``, ``pop``,
+ ``insert`` ``remove``, ``clear``, and ``__iadd__``
+
+:class:`Set` :class:`Sized`, ``__le__``, ``__lt__``, ``__eq__``, ``__ne__``,
+ :class:`Iterable`, ``__gt__``, ``__ge__``, ``__and__``, ``__or__``,
+ :class:`Container` ``__sub__``, ``__xor__``, and ``isdisjoint``
+
+:class:`MutableSet` :class:`Set` ``add``, Inherited :class:`Set` methods and
+ ``discard`` ``clear``, ``pop``, ``remove``, ``__ior__``,
+ ``__iand__``, ``__ixor__``, and ``__isub__``
+
+:class:`Mapping` :class:`Sized`, ``__getitem__`` ``__contains__``, ``keys``, ``items``, ``values``,
+ :class:`Iterable`, ``get``, ``__eq__``, and ``__ne__``
+ :class:`Container`
+
+:class:`MutableMapping` :class:`Mapping` ``__setitem__``, Inherited :class:`Mapping` methods and
+ ``__delitem__`` ``pop``, ``popitem``, ``clear``, ``update``,
+ and ``setdefault``
+
+
+:class:`MappingView` :class:`Sized` ``__len__``
+:class:`ItemsView` :class:`MappingView`, ``__contains__``,
+ :class:`Set` ``__iter__``
+:class:`KeysView` :class:`MappingView`, ``__contains__``,
+ :class:`Set` ``__iter__``
+:class:`ValuesView` :class:`MappingView` ``__contains__``, ``__iter__``
+========================= ===================== ====================== ====================================================
+
+
+.. class:: Container
+ Hashable
+ Sized
+ Callable
+
+ ABCs for classes that provide respectively the methods :meth:`__contains__`,
+ :meth:`__hash__`, :meth:`__len__`, and :meth:`__call__`.
+
+.. class:: Iterable
+
+ ABC for classes that provide the :meth:`__iter__` method.
+ See also the definition of :term:`iterable`.
+
+.. class:: Iterator
+
+ ABC for classes that provide the :meth:`__iter__` and :meth:`next` methods.
+ See also the definition of :term:`iterator`.
+
+.. class:: Sequence
+ MutableSequence
+
+ ABCs for read-only and mutable :term:`sequences <sequence>`.
+
+.. class:: Set
+ MutableSet
+
+ ABCs for read-only and mutable sets.
+
+.. class:: Mapping
+ MutableMapping
+
+ ABCs for read-only and mutable :term:`mappings <mapping>`.
+
+.. class:: MappingView
+ ItemsView
+ KeysView
+ ValuesView
+
+ ABCs for mapping, items, keys, and values :term:`views <view>`.
+
+
+These ABCs allow us to ask classes or instances if they provide
+particular functionality, for example::
+
+ size = None
+ if isinstance(myvar, collections.Sized):
+ size = len(myvar)
+
+Several of the ABCs are also useful as mixins that make it easier to develop
+classes supporting container APIs. For example, to write a class supporting
+the full :class:`Set` API, it only necessary to supply the three underlying
+abstract methods: :meth:`__contains__`, :meth:`__iter__`, and :meth:`__len__`.
+The ABC supplies the remaining methods such as :meth:`__and__` and
+:meth:`isdisjoint` ::
+
+ class ListBasedSet(collections.Set):
+ ''' Alternate set implementation favoring space over speed
+ and not requiring the set elements to be hashable. '''
+ def __init__(self, iterable):
+ self.elements = lst = []
+ for value in iterable:
+ if value not in lst:
+ lst.append(value)
+ def __iter__(self):
+ return iter(self.elements)
+ def __contains__(self, value):
+ return value in self.elements
+ def __len__(self):
+ return len(self.elements)
+
+ s1 = ListBasedSet('abcdef')
+ s2 = ListBasedSet('defghi')
+ overlap = s1 & s2 # The __and__() method is supported automatically
+
+Notes on using :class:`Set` and :class:`MutableSet` as a mixin:
+
+(1)
+ Since some set operations create new sets, the default mixin methods need
+ a way to create new instances from an iterable. The class constructor is
+ assumed to have a signature in the form ``ClassName(iterable)``.
+ That assumption is factored-out to an internal classmethod called
+ :meth:`_from_iterable` which calls ``cls(iterable)`` to produce a new set.
+ If the :class:`Set` mixin is being used in a class with a different
+ constructor signature, you will need to override :meth:`_from_iterable`
+ with a classmethod that can construct new instances from
+ an iterable argument.
+
+(2)
+ To override the comparisons (presumably for speed, as the
+ semantics are fixed), redefine :meth:`__le__` and
+ then the other operations will automatically follow suit.
+
+(3)
+ The :class:`Set` mixin provides a :meth:`_hash` method to compute a hash value
+ for the set; however, :meth:`__hash__` is not defined because not all sets
+ are hashable or immutable. To add set hashabilty using mixins,
+ inherit from both :meth:`Set` and :meth:`Hashable`, then define
+ ``__hash__ = Set._hash``.
+
+.. seealso::
+
+ * `OrderedSet recipe <http://code.activestate.com/recipes/576694/>`_ for an
+ example built on :class:`MutableSet`.
+
+ * For more about ABCs, see the :mod:`abc` module and :pep:`3119`.
diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst
index 57e0481..45da4e5 100644
--- a/Doc/library/collections.rst
+++ b/Doc/library/collections.rst
@@ -2,17 +2,17 @@
==========================================
.. module:: collections
- :synopsis: Container datatypes
+ :synopsis: Container datatypes
.. moduleauthor:: Raymond Hettinger <python@rcn.com>
.. sectionauthor:: Raymond Hettinger <python@rcn.com>
.. testsetup:: *
- from collections import *
- import itertools
- __name__ = '<doctest>'
+ from collections import *
+ import itertools
+ __name__ = '<doctest>'
-**Source code:** :source:`Lib/collections.py` and :source:`Lib/_abcoll.py`
+**Source code:** :source:`Lib/collections/__init__.py`
--------------
@@ -23,6 +23,7 @@ Python's general purpose built-in containers, :class:`dict`, :class:`list`,
===================== ====================================================================
:func:`namedtuple` factory function for creating tuple subclasses with named fields
:class:`deque` list-like container with fast appends and pops on either end
+:class:`ChainMap` dict-like class for creating a single view of multiple mappings
:class:`Counter` dict subclass for counting hashable objects
:class:`OrderedDict` dict subclass that remembers the order entries were added
:class:`defaultdict` dict subclass that calls a factory function to supply missing values
@@ -31,10 +32,167 @@ Python's general purpose built-in containers, :class:`dict`, :class:`list`,
:class:`UserString` wrapper around string objects for easier string subclassing
===================== ====================================================================
-In addition to the concrete container classes, the collections module provides
-:ref:`abstract base classes <collections-abstract-base-classes>` that can be
-used to test whether a class provides a particular interface, for example,
-whether it is hashable or a mapping.
+.. versionchanged:: 3.3
+ Moved :ref:`collections-abstract-base-classes` to the :mod:`collections.abc` module.
+ For backwards compatibility, they continue to be visible in this module
+ as well.
+
+
+:class:`ChainMap` objects
+-------------------------
+
+.. versionadded:: 3.3
+
+A :class:`ChainMap` class is provided for quickly linking a number of mappings
+so they can be treated as a single unit. It is often much faster than creating
+a new dictionary and running multiple :meth:`~dict.update` calls.
+
+The class can be used to simulate nested scopes and is useful in templating.
+
+.. class:: ChainMap(*maps)
+
+ A :class:`ChainMap` groups multiple dicts or other mappings together to
+ create a single, updateable view. If no *maps* are specified, a single empty
+ dictionary is provided so that a new chain always has at least one mapping.
+
+ The underlying mappings are stored in a list. That list is public and can
+ accessed or updated using the *maps* attribute. There is no other state.
+
+ Lookups search the underlying mappings successively until a key is found. In
+ contrast, writes, updates, and deletions only operate on the first mapping.
+
+ A :class:`ChainMap` incorporates the underlying mappings by reference. So, if
+ one of the underlying mappings gets updated, those changes will be reflected
+ in :class:`ChainMap`.
+
+ All of the usual dictionary methods are supported. In addition, there is a
+ *maps* attribute, a method for creating new subcontexts, and a property for
+ accessing all but the first mapping:
+
+ .. attribute:: maps
+
+ A user updateable list of mappings. The list is ordered from
+ first-searched to last-searched. It is the only stored state and can
+ be modified to change which mappings are searched. The list should
+ always contain at least one mapping.
+
+ .. method:: new_child()
+
+ Returns a new :class:`ChainMap` containing a new :class:`dict` followed by
+ all of the maps in the current instance. A call to ``d.new_child()`` is
+ equivalent to: ``ChainMap({}, *d.maps)``. This method is used for
+ creating subcontexts that can be updated without altering values in any
+ of the parent mappings.
+
+ .. attribute:: parents
+
+ Property returning a new :class:`ChainMap` containing all of the maps in
+ the current instance except the first one. This is useful for skipping
+ the first map in the search. Use cases are similar to those for the
+ :keyword:`nonlocal` keyword used in :term:`nested scopes <nested
+ scope>`. The use cases also parallel those for the built-in
+ :func:`super` function. A reference to ``d.parents`` is equivalent to:
+ ``ChainMap(*d.maps[1:])``.
+
+
+.. seealso::
+
+ * The `MultiContext class
+ <https://github.com/enthought/codetools/blob/4.0.0/codetools/contexts/multi_context.py>`_
+ in the Enthought `CodeTools package
+ <https://github.com/enthought/codetools>`_ has options to support
+ writing to any mapping in the chain.
+
+ * Django's `Context class
+ <http://code.djangoproject.com/browser/django/trunk/django/template/context.py>`_
+ for templating is a read-only chain of mappings. It also features
+ pushing and popping of contexts similar to the
+ :meth:`~collections.ChainMap.new_child` method and the
+ :meth:`~collections.ChainMap.parents` property.
+
+ * The `Nested Contexts recipe
+ <http://code.activestate.com/recipes/577434/>`_ has options to control
+ whether writes and other mutations apply only to the first mapping or to
+ any mapping in the chain.
+
+ * A `greatly simplified read-only version of Chainmap
+ <http://code.activestate.com/recipes/305268/>`_.
+
+
+:class:`ChainMap` Examples and Recipes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This section shows various approaches to working with chained maps.
+
+
+Example of simulating Python's internal lookup chain::
+
+ import builtins
+ pylookup = ChainMap(locals(), globals(), vars(builtins))
+
+Example of letting user specified command-line arguments take precedence over
+environment variables which in turn take precedence over default values::
+
+ import os, argparse
+
+ defaults = {'color': 'red', 'user': 'guest'}
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-u', '--user')
+ parser.add_argument('-c', '--color')
+ namespace = parser.parse_args()
+ command_line_args = {k:v for k, v in vars(namespace).items() if v}
+
+ combined = ChainMap(command_line_args, os.environ, defaults)
+ print(combined['color'])
+ print(combined['user'])
+
+Example patterns for using the :class:`ChainMap` class to simulate nested
+contexts::
+
+ c = ChainMap() # Create root context
+ d = c.new_child() # Create nested child context
+ e = c.new_child() # Child of c, independent from d
+ e.maps[0] # Current context dictionary -- like Python's locals()
+ e.maps[-1] # Root context -- like Python's globals()
+ e.parents # Enclosing context chain -- like Python's nonlocals
+
+ d['x'] # Get first key in the chain of contexts
+ d['x'] = 1 # Set value in current context
+ del d['x'] # Delete from current context
+ list(d) # All nested values
+ k in d # Check all nested values
+ len(d) # Number of nested values
+ d.items() # All nested items
+ dict(d) # Flatten into a regular dictionary
+
+The :class:`ChainMap` class only makes updates (writes and deletions) to the
+first mapping in the chain while lookups will search the full chain. However,
+if deep writes and deletions are desired, it is easy to make a subclass that
+updates keys found deeper in the chain::
+
+ class DeepChainMap(ChainMap):
+ 'Variant of ChainMap that allows direct updates to inner scopes'
+
+ def __setitem__(self, key, value):
+ for mapping in self.maps:
+ if key in mapping:
+ mapping[key] = value
+ return
+ self.maps[0][key] = value
+
+ def __delitem__(self, key):
+ for mapping in self.maps:
+ if key in mapping:
+ del mapping[key]
+ return
+ raise KeyError(key)
+
+ >>> d = DeepChainMap({'zebra': 'black'}, {'elephant' : 'blue'}, {'lion' : 'yellow'})
+ >>> d['lion'] = 'orange' # update an existing key two levels down
+ >>> d['snake'] = 'red' # new keys get added to the topmost dict
+ >>> del d['elephant'] # remove an existing key one level down
+ DeepChainMap({'zebra': 'black', 'snake': 'red'}, {}, {'lion': 'orange'})
:class:`Counter` objects
@@ -59,85 +217,85 @@ For example::
.. class:: Counter([iterable-or-mapping])
- A :class:`Counter` is a :class:`dict` subclass for counting hashable objects.
- It is an unordered collection where elements are stored as dictionary keys
- and their counts are stored as dictionary values. Counts are allowed to be
- any integer value including zero or negative counts. The :class:`Counter`
- class is similar to bags or multisets in other languages.
+ A :class:`Counter` is a :class:`dict` subclass for counting hashable objects.
+ It is an unordered collection where elements are stored as dictionary keys
+ and their counts are stored as dictionary values. Counts are allowed to be
+ any integer value including zero or negative counts. The :class:`Counter`
+ class is similar to bags or multisets in other languages.
- Elements are counted from an *iterable* or initialized from another
- *mapping* (or counter):
+ Elements are counted from an *iterable* or initialized from another
+ *mapping* (or counter):
>>> c = Counter() # a new, empty counter
>>> c = Counter('gallahad') # a new counter from an iterable
>>> c = Counter({'red': 4, 'blue': 2}) # a new counter from a mapping
>>> c = Counter(cats=4, dogs=8) # a new counter from keyword args
- Counter objects have a dictionary interface except that they return a zero
- count for missing items instead of raising a :exc:`KeyError`:
+ Counter objects have a dictionary interface except that they return a zero
+ count for missing items instead of raising a :exc:`KeyError`:
>>> c = Counter(['eggs', 'ham'])
>>> c['bacon'] # count of a missing element is zero
0
- Setting a count to zero does not remove an element from a counter.
- Use ``del`` to remove it entirely:
+ Setting a count to zero does not remove an element from a counter.
+ Use ``del`` to remove it entirely:
>>> c['sausage'] = 0 # counter entry with a zero count
>>> del c['sausage'] # del actually removes the entry
- .. versionadded:: 3.1
+ .. versionadded:: 3.1
- Counter objects support three methods beyond those available for all
- dictionaries:
+ Counter objects support three methods beyond those available for all
+ dictionaries:
- .. method:: elements()
+ .. method:: elements()
- Return an iterator over elements repeating each as many times as its
- count. Elements are returned in arbitrary order. If an element's count
- is less than one, :meth:`elements` will ignore it.
+ Return an iterator over elements repeating each as many times as its
+ count. Elements are returned in arbitrary order. If an element's count
+ is less than one, :meth:`elements` will ignore it.
>>> c = Counter(a=4, b=2, c=0, d=-2)
>>> list(c.elements())
['a', 'a', 'a', 'a', 'b', 'b']
- .. method:: most_common([n])
+ .. method:: most_common([n])
- Return a list of the *n* most common elements and their counts from the
- most common to the least. If *n* is not specified, :func:`most_common`
- returns *all* elements in the counter. Elements with equal counts are
- ordered arbitrarily:
+ Return a list of the *n* most common elements and their counts from the
+ most common to the least. If *n* is not specified, :func:`most_common`
+ returns *all* elements in the counter. Elements with equal counts are
+ ordered arbitrarily:
>>> Counter('abracadabra').most_common(3)
[('a', 5), ('r', 2), ('b', 2)]
- .. method:: subtract([iterable-or-mapping])
+ .. method:: subtract([iterable-or-mapping])
- Elements are subtracted from an *iterable* or from another *mapping*
- (or counter). Like :meth:`dict.update` but subtracts counts instead
- of replacing them. Both inputs and outputs may be zero or negative.
+ Elements are subtracted from an *iterable* or from another *mapping*
+ (or counter). Like :meth:`dict.update` but subtracts counts instead
+ of replacing them. Both inputs and outputs may be zero or negative.
>>> c = Counter(a=4, b=2, c=0, d=-2)
>>> d = Counter(a=1, b=2, c=3, d=4)
>>> c.subtract(d)
Counter({'a': 3, 'b': 0, 'c': -3, 'd': -6})
- .. versionadded:: 3.2
+ .. versionadded:: 3.2
- The usual dictionary methods are available for :class:`Counter` objects
- except for two which work differently for counters.
+ The usual dictionary methods are available for :class:`Counter` objects
+ except for two which work differently for counters.
- .. method:: fromkeys(iterable)
+ .. method:: fromkeys(iterable)
- This class method is not implemented for :class:`Counter` objects.
+ This class method is not implemented for :class:`Counter` objects.
- .. method:: update([iterable-or-mapping])
+ .. method:: update([iterable-or-mapping])
- Elements are counted from an *iterable* or added-in from another
- *mapping* (or counter). Like :meth:`dict.update` but adds counts
- instead of replacing them. Also, the *iterable* is expected to be a
- sequence of elements, not a sequence of ``(key, value)`` pairs.
+ Elements are counted from an *iterable* or added-in from another
+ *mapping* (or counter). Like :meth:`dict.update` but adds counts
+ instead of replacing them. Also, the *iterable* is expected to be a
+ sequence of elements, not a sequence of ``(key, value)`` pairs.
Common patterns for working with :class:`Counter` objects::
@@ -149,7 +307,7 @@ Common patterns for working with :class:`Counter` objects::
c.items() # convert to a list of (elem, cnt) pairs
Counter(dict(list_of_pairs)) # convert from a list of (elem, cnt) pairs
c.most_common()[:-n:-1] # n least common elements
- c += Counter() # remove zero and negative counts
+ +c # remove zero and negative counts
Several mathematical operations are provided for combining :class:`Counter`
objects to produce multisets (counters that have counts greater than zero).
@@ -169,32 +327,44 @@ counts, but the output will exclude results with counts of zero or less.
>>> c | d # union: max(c[x], d[x])
Counter({'a': 3, 'b': 2})
+Unary addition and substraction are shortcuts for adding an empty counter
+or subtracting from an empty counter.
+
+ >>> c = Counter(a=2, b=-4)
+ >>> +c
+ Counter({'a': 2})
+ >>> -c
+ Counter({'b': 4})
+
+.. versionadded:: 3.3
+ Added support for unary plus, unary minus, and in-place multiset operations.
+
.. note::
- Counters were primarily designed to work with positive integers to represent
- running counts; however, care was taken to not unnecessarily preclude use
- cases needing other types or negative values. To help with those use cases,
- this section documents the minimum range and type restrictions.
+ Counters were primarily designed to work with positive integers to represent
+ running counts; however, care was taken to not unnecessarily preclude use
+ cases needing other types or negative values. To help with those use cases,
+ this section documents the minimum range and type restrictions.
- * The :class:`Counter` class itself is a dictionary subclass with no
- restrictions on its keys and values. The values are intended to be numbers
- representing counts, but you *could* store anything in the value field.
+ * The :class:`Counter` class itself is a dictionary subclass with no
+ restrictions on its keys and values. The values are intended to be numbers
+ representing counts, but you *could* store anything in the value field.
- * The :meth:`most_common` method requires only that the values be orderable.
+ * The :meth:`most_common` method requires only that the values be orderable.
- * For in-place operations such as ``c[key] += 1``, the value type need only
- support addition and subtraction. So fractions, floats, and decimals would
- work and negative values are supported. The same is also true for
- :meth:`update` and :meth:`subtract` which allow negative and zero values
- for both inputs and outputs.
+ * For in-place operations such as ``c[key] += 1``, the value type need only
+ support addition and subtraction. So fractions, floats, and decimals would
+ work and negative values are supported. The same is also true for
+ :meth:`update` and :meth:`subtract` which allow negative and zero values
+ for both inputs and outputs.
- * The multiset methods are designed only for use cases with positive values.
- The inputs may be negative or zero, but only outputs with positive values
- are created. There are no type restrictions, but the value type needs to
- support addition, subtraction, and comparison.
+ * The multiset methods are designed only for use cases with positive values.
+ The inputs may be negative or zero, but only outputs with positive values
+ are created. There are no type restrictions, but the value type needs to
+ support addition, subtraction, and comparison.
- * The :meth:`elements` method requires integer counts. It ignores zero and
- negative counts.
+ * The :meth:`elements` method requires integer counts. It ignores zero and
+ negative counts.
.. seealso::
@@ -217,7 +387,7 @@ counts, but the output will exclude results with counts of zero or less.
* To enumerate all distinct multisets of a given size over a given set of
elements, see :func:`itertools.combinations_with_replacement`.
- map(Counter, combinations_with_replacement('ABC', 2)) --> AA AB AC BB BC CC
+ map(Counter, combinations_with_replacement('ABC', 2)) --> AA AB AC BB BC CC
:class:`deque` objects
@@ -225,105 +395,105 @@ counts, but the output will exclude results with counts of zero or less.
.. class:: deque([iterable, [maxlen]])
- Returns a new deque object initialized left-to-right (using :meth:`append`) with
- data from *iterable*. If *iterable* is not specified, the new deque is empty.
+ Returns a new deque object initialized left-to-right (using :meth:`append`) with
+ data from *iterable*. If *iterable* is not specified, the new deque is empty.
- Deques are a generalization of stacks and queues (the name is pronounced "deck"
- and is short for "double-ended queue"). Deques support thread-safe, memory
- efficient appends and pops from either side of the deque with approximately the
- same O(1) performance in either direction.
+ Deques are a generalization of stacks and queues (the name is pronounced "deck"
+ and is short for "double-ended queue"). Deques support thread-safe, memory
+ efficient appends and pops from either side of the deque with approximately the
+ same O(1) performance in either direction.
- Though :class:`list` objects support similar operations, they are optimized for
- fast fixed-length operations and incur O(n) memory movement costs for
- ``pop(0)`` and ``insert(0, v)`` operations which change both the size and
- position of the underlying data representation.
+ Though :class:`list` objects support similar operations, they are optimized for
+ fast fixed-length operations and incur O(n) memory movement costs for
+ ``pop(0)`` and ``insert(0, v)`` operations which change both the size and
+ position of the underlying data representation.
- If *maxlen* is not specified or is *None*, deques may grow to an
- arbitrary length. Otherwise, the deque is bounded to the specified maximum
- length. Once a bounded length deque is full, when new items are added, a
- corresponding number of items are discarded from the opposite end. Bounded
- length deques provide functionality similar to the ``tail`` filter in
- Unix. They are also useful for tracking transactions and other pools of data
- where only the most recent activity is of interest.
+ If *maxlen* is not specified or is *None*, deques may grow to an
+ arbitrary length. Otherwise, the deque is bounded to the specified maximum
+ length. Once a bounded length deque is full, when new items are added, a
+ corresponding number of items are discarded from the opposite end. Bounded
+ length deques provide functionality similar to the ``tail`` filter in
+ Unix. They are also useful for tracking transactions and other pools of data
+ where only the most recent activity is of interest.
- Deque objects support the following methods:
+ Deque objects support the following methods:
- .. method:: append(x)
+ .. method:: append(x)
- Add *x* to the right side of the deque.
+ Add *x* to the right side of the deque.
- .. method:: appendleft(x)
+ .. method:: appendleft(x)
- Add *x* to the left side of the deque.
+ Add *x* to the left side of the deque.
- .. method:: clear()
+ .. method:: clear()
- Remove all elements from the deque leaving it with length 0.
+ Remove all elements from the deque leaving it with length 0.
- .. method:: count(x)
+ .. method:: count(x)
- Count the number of deque elements equal to *x*.
+ Count the number of deque elements equal to *x*.
- .. versionadded:: 3.2
+ .. versionadded:: 3.2
- .. method:: extend(iterable)
+ .. method:: extend(iterable)
- Extend the right side of the deque by appending elements from the iterable
- argument.
+ Extend the right side of the deque by appending elements from the iterable
+ argument.
- .. method:: extendleft(iterable)
+ .. method:: extendleft(iterable)
- Extend the left side of the deque by appending elements from *iterable*.
- Note, the series of left appends results in reversing the order of
- elements in the iterable argument.
+ Extend the left side of the deque by appending elements from *iterable*.
+ Note, the series of left appends results in reversing the order of
+ elements in the iterable argument.
- .. method:: pop()
+ .. method:: pop()
- Remove and return an element from the right side of the deque. If no
- elements are present, raises an :exc:`IndexError`.
+ Remove and return an element from the right side of the deque. If no
+ elements are present, raises an :exc:`IndexError`.
- .. method:: popleft()
+ .. method:: popleft()
- Remove and return an element from the left side of the deque. If no
- elements are present, raises an :exc:`IndexError`.
+ Remove and return an element from the left side of the deque. If no
+ elements are present, raises an :exc:`IndexError`.
- .. method:: remove(value)
+ .. method:: remove(value)
- Removed the first occurrence of *value*. If not found, raises a
- :exc:`ValueError`.
+ Removed the first occurrence of *value*. If not found, raises a
+ :exc:`ValueError`.
- .. method:: reverse()
+ .. method:: reverse()
- Reverse the elements of the deque in-place and then return ``None``.
+ Reverse the elements of the deque in-place and then return ``None``.
- .. versionadded:: 3.2
+ .. versionadded:: 3.2
- .. method:: rotate(n)
+ .. method:: rotate(n)
- Rotate the deque *n* steps to the right. If *n* is negative, rotate to
- the left. Rotating one step to the right is equivalent to:
- ``d.appendleft(d.pop())``.
+ Rotate the deque *n* steps to the right. If *n* is negative, rotate to
+ the left. Rotating one step to the right is equivalent to:
+ ``d.appendleft(d.pop())``.
- Deque objects also provide one read-only attribute:
+ Deque objects also provide one read-only attribute:
- .. attribute:: maxlen
+ .. attribute:: maxlen
- Maximum size of a deque or *None* if unbounded.
+ Maximum size of a deque or *None* if unbounded.
- .. versionadded:: 3.1
+ .. versionadded:: 3.1
In addition to the above, deques support iteration, pickling, ``len(d)``,
@@ -336,56 +506,56 @@ Example:
.. doctest::
- >>> from collections import deque
- >>> d = deque('ghi') # make a new deque with three items
- >>> for elem in d: # iterate over the deque's elements
- ... print(elem.upper())
- G
- H
- I
-
- >>> d.append('j') # add a new entry to the right side
- >>> d.appendleft('f') # add a new entry to the left side
- >>> d # show the representation of the deque
- deque(['f', 'g', 'h', 'i', 'j'])
-
- >>> d.pop() # return and remove the rightmost item
- 'j'
- >>> d.popleft() # return and remove the leftmost item
- 'f'
- >>> list(d) # list the contents of the deque
- ['g', 'h', 'i']
- >>> d[0] # peek at leftmost item
- 'g'
- >>> d[-1] # peek at rightmost item
- 'i'
-
- >>> list(reversed(d)) # list the contents of a deque in reverse
- ['i', 'h', 'g']
- >>> 'h' in d # search the deque
- True
- >>> d.extend('jkl') # add multiple elements at once
- >>> d
- deque(['g', 'h', 'i', 'j', 'k', 'l'])
- >>> d.rotate(1) # right rotation
- >>> d
- deque(['l', 'g', 'h', 'i', 'j', 'k'])
- >>> d.rotate(-1) # left rotation
- >>> d
- deque(['g', 'h', 'i', 'j', 'k', 'l'])
-
- >>> deque(reversed(d)) # make a new deque in reverse order
- deque(['l', 'k', 'j', 'i', 'h', 'g'])
- >>> d.clear() # empty the deque
- >>> d.pop() # cannot pop from an empty deque
- Traceback (most recent call last):
- File "<pyshell#6>", line 1, in -toplevel-
- d.pop()
- IndexError: pop from an empty deque
-
- >>> d.extendleft('abc') # extendleft() reverses the input order
- >>> d
- deque(['c', 'b', 'a'])
+ >>> from collections import deque
+ >>> d = deque('ghi') # make a new deque with three items
+ >>> for elem in d: # iterate over the deque's elements
+ ... print(elem.upper())
+ G
+ H
+ I
+
+ >>> d.append('j') # add a new entry to the right side
+ >>> d.appendleft('f') # add a new entry to the left side
+ >>> d # show the representation of the deque
+ deque(['f', 'g', 'h', 'i', 'j'])
+
+ >>> d.pop() # return and remove the rightmost item
+ 'j'
+ >>> d.popleft() # return and remove the leftmost item
+ 'f'
+ >>> list(d) # list the contents of the deque
+ ['g', 'h', 'i']
+ >>> d[0] # peek at leftmost item
+ 'g'
+ >>> d[-1] # peek at rightmost item
+ 'i'
+
+ >>> list(reversed(d)) # list the contents of a deque in reverse
+ ['i', 'h', 'g']
+ >>> 'h' in d # search the deque
+ True
+ >>> d.extend('jkl') # add multiple elements at once
+ >>> d
+ deque(['g', 'h', 'i', 'j', 'k', 'l'])
+ >>> d.rotate(1) # right rotation
+ >>> d
+ deque(['l', 'g', 'h', 'i', 'j', 'k'])
+ >>> d.rotate(-1) # left rotation
+ >>> d
+ deque(['g', 'h', 'i', 'j', 'k', 'l'])
+
+ >>> deque(reversed(d)) # make a new deque in reverse order
+ deque(['l', 'k', 'j', 'i', 'h', 'g'])
+ >>> d.clear() # empty the deque
+ >>> d.pop() # cannot pop from an empty deque
+ Traceback (most recent call last):
+ File "<pyshell#6>", line 1, in -toplevel-
+ d.pop()
+ IndexError: pop from an empty deque
+
+ >>> d.extendleft('abc') # extendleft() reverses the input order
+ >>> d
+ deque(['c', 'b', 'a'])
:class:`deque` Recipes
@@ -396,9 +566,10 @@ This section shows various approaches to working with deques.
Bounded length deques provide functionality similar to the ``tail`` filter
in Unix::
- def tail(filename, n=10):
- 'Return the last n lines of a file'
- return deque(open(filename), n)
+ def tail(filename, n=10):
+ 'Return the last n lines of a file'
+ with open(filename) as f:
+ return deque(f, n)
Another approach to using deques is to maintain a sequence of recently
added elements by appending to the right and popping to the left::
@@ -419,10 +590,10 @@ The :meth:`rotate` method provides a way to implement :class:`deque` slicing and
deletion. For example, a pure Python implementation of ``del d[n]`` relies on
the :meth:`rotate` method to position elements to be popped::
- def delete_nth(d, n):
- d.rotate(-n)
- d.popleft()
- d.rotate(n)
+ def delete_nth(d, n):
+ d.rotate(-n)
+ d.popleft()
+ d.rotate(n)
To implement :class:`deque` slicing, use a similar approach applying
:meth:`rotate` to bring a target element to the left side of the deque. Remove
@@ -438,50 +609,50 @@ stack manipulations such as ``dup``, ``drop``, ``swap``, ``over``, ``pick``,
.. class:: defaultdict([default_factory[, ...]])
- Returns a new dictionary-like object. :class:`defaultdict` is a subclass of the
- built-in :class:`dict` class. It overrides one method and adds one writable
- instance variable. The remaining functionality is the same as for the
- :class:`dict` class and is not documented here.
+ Returns a new dictionary-like object. :class:`defaultdict` is a subclass of the
+ built-in :class:`dict` class. It overrides one method and adds one writable
+ instance variable. The remaining functionality is the same as for the
+ :class:`dict` class and is not documented here.
- The first argument provides the initial value for the :attr:`default_factory`
- attribute; it defaults to ``None``. All remaining arguments are treated the same
- as if they were passed to the :class:`dict` constructor, including keyword
- arguments.
+ The first argument provides the initial value for the :attr:`default_factory`
+ attribute; it defaults to ``None``. All remaining arguments are treated the same
+ as if they were passed to the :class:`dict` constructor, including keyword
+ arguments.
- :class:`defaultdict` objects support the following method in addition to the
- standard :class:`dict` operations:
+ :class:`defaultdict` objects support the following method in addition to the
+ standard :class:`dict` operations:
- .. method:: __missing__(key)
+ .. method:: __missing__(key)
- If the :attr:`default_factory` attribute is ``None``, this raises a
- :exc:`KeyError` exception with the *key* as argument.
+ If the :attr:`default_factory` attribute is ``None``, this raises a
+ :exc:`KeyError` exception with the *key* as argument.
- If :attr:`default_factory` is not ``None``, it is called without arguments
- to provide a default value for the given *key*, this value is inserted in
- the dictionary for the *key*, and returned.
+ If :attr:`default_factory` is not ``None``, it is called without arguments
+ to provide a default value for the given *key*, this value is inserted in
+ the dictionary for the *key*, and returned.
- If calling :attr:`default_factory` raises an exception this exception is
- propagated unchanged.
+ If calling :attr:`default_factory` raises an exception this exception is
+ propagated unchanged.
- This method is called by the :meth:`__getitem__` method of the
- :class:`dict` class when the requested key is not found; whatever it
- returns or raises is then returned or raised by :meth:`__getitem__`.
+ This method is called by the :meth:`__getitem__` method of the
+ :class:`dict` class when the requested key is not found; whatever it
+ returns or raises is then returned or raised by :meth:`__getitem__`.
- Note that :meth:`__missing__` is *not* called for any operations besides
- :meth:`__getitem__`. This means that :meth:`get` will, like normal
- dictionaries, return ``None`` as a default rather than using
- :attr:`default_factory`.
+ Note that :meth:`__missing__` is *not* called for any operations besides
+ :meth:`__getitem__`. This means that :meth:`get` will, like normal
+ dictionaries, return ``None`` as a default rather than using
+ :attr:`default_factory`.
- :class:`defaultdict` objects support the following instance variable:
+ :class:`defaultdict` objects support the following instance variable:
- .. attribute:: default_factory
+ .. attribute:: default_factory
- This attribute is used by the :meth:`__missing__` method; it is
- initialized from the first argument to the constructor, if present, or to
- ``None``, if absent.
+ This attribute is used by the :meth:`__missing__` method; it is
+ initialized from the first argument to the constructor, if present, or to
+ ``None``, if absent.
:class:`defaultdict` Examples
@@ -490,13 +661,13 @@ stack manipulations such as ``dup``, ``drop``, ``swap``, ``over``, ``pick``,
Using :class:`list` as the :attr:`default_factory`, it is easy to group a
sequence of key-value pairs into a dictionary of lists:
- >>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
- >>> d = defaultdict(list)
- >>> for k, v in s:
- ... d[k].append(v)
- ...
- >>> list(d.items())
- [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
+ >>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
+ >>> d = defaultdict(list)
+ >>> for k, v in s:
+ ... d[k].append(v)
+ ...
+ >>> list(d.items())
+ [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
When each key is encountered for the first time, it is not already in the
mapping; so an entry is automatically created using the :attr:`default_factory`
@@ -506,24 +677,24 @@ again, the look-up proceeds normally (returning the list for that key) and the
:meth:`list.append` operation adds another value to the list. This technique is
simpler and faster than an equivalent technique using :meth:`dict.setdefault`:
- >>> d = {}
- >>> for k, v in s:
- ... d.setdefault(k, []).append(v)
- ...
- >>> list(d.items())
- [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
+ >>> d = {}
+ >>> for k, v in s:
+ ... d.setdefault(k, []).append(v)
+ ...
+ >>> list(d.items())
+ [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
Setting the :attr:`default_factory` to :class:`int` makes the
:class:`defaultdict` useful for counting (like a bag or multiset in other
languages):
- >>> s = 'mississippi'
- >>> d = defaultdict(int)
- >>> for k in s:
- ... d[k] += 1
- ...
- >>> list(d.items())
- [('i', 4), ('p', 2), ('s', 4), ('m', 1)]
+ >>> s = 'mississippi'
+ >>> d = defaultdict(int)
+ >>> for k in s:
+ ... d[k] += 1
+ ...
+ >>> list(d.items())
+ [('i', 4), ('p', 2), ('s', 4), ('m', 1)]
When a letter is first encountered, it is missing from the mapping, so the
:attr:`default_factory` function calls :func:`int` to supply a default count of
@@ -534,23 +705,23 @@ constant functions. A faster and more flexible way to create constant functions
is to use a lambda function which can supply any constant value (not just
zero):
- >>> def constant_factory(value):
- ... return lambda: value
- >>> d = defaultdict(constant_factory('<missing>'))
- >>> d.update(name='John', action='ran')
- >>> '%(name)s %(action)s to %(object)s' % d
- 'John ran to <missing>'
+ >>> def constant_factory(value):
+ ... return lambda: value
+ >>> d = defaultdict(constant_factory('<missing>'))
+ >>> d.update(name='John', action='ran')
+ >>> '%(name)s %(action)s to %(object)s' % d
+ 'John ran to <missing>'
Setting the :attr:`default_factory` to :class:`set` makes the
:class:`defaultdict` useful for building a dictionary of sets:
- >>> s = [('red', 1), ('blue', 2), ('red', 3), ('blue', 4), ('red', 1), ('blue', 4)]
- >>> d = defaultdict(set)
- >>> for k, v in s:
- ... d[k].add(v)
- ...
- >>> list(d.items())
- [('blue', set([2, 4])), ('red', set([1, 3]))]
+ >>> s = [('red', 1), ('blue', 2), ('red', 3), ('blue', 4), ('red', 1), ('blue', 4)]
+ >>> d = defaultdict(set)
+ >>> for k, v in s:
+ ... d[k].add(v)
+ ...
+ >>> list(d.items())
+ [('blue', {2, 4}), ('red', {1, 3})]
:func:`namedtuple` Factory Function for Tuples with Named Fields
@@ -562,168 +733,131 @@ they add the ability to access fields by name instead of position index.
.. function:: namedtuple(typename, field_names, verbose=False, rename=False)
- Returns a new tuple subclass named *typename*. The new subclass is used to
- create tuple-like objects that have fields accessible by attribute lookup as
- well as being indexable and iterable. Instances of the subclass also have a
- helpful docstring (with typename and field_names) and a helpful :meth:`__repr__`
- method which lists the tuple contents in a ``name=value`` format.
+ Returns a new tuple subclass named *typename*. The new subclass is used to
+ create tuple-like objects that have fields accessible by attribute lookup as
+ well as being indexable and iterable. Instances of the subclass also have a
+ helpful docstring (with typename and field_names) and a helpful :meth:`__repr__`
+ method which lists the tuple contents in a ``name=value`` format.
- The *field_names* are a single string with each fieldname separated by whitespace
- and/or commas, for example ``'x y'`` or ``'x, y'``. Alternatively, *field_names*
- can be a sequence of strings such as ``['x', 'y']``.
+ The *field_names* are a single string with each fieldname separated by whitespace
+ and/or commas, for example ``'x y'`` or ``'x, y'``. Alternatively, *field_names*
+ can be a sequence of strings such as ``['x', 'y']``.
- Any valid Python identifier may be used for a fieldname except for names
- starting with an underscore. Valid identifiers consist of letters, digits,
- and underscores but do not start with a digit or underscore and cannot be
- a :mod:`keyword` such as *class*, *for*, *return*, *global*, *pass*,
- or *raise*.
+ Any valid Python identifier may be used for a fieldname except for names
+ starting with an underscore. Valid identifiers consist of letters, digits,
+ and underscores but do not start with a digit or underscore and cannot be
+ a :mod:`keyword` such as *class*, *for*, *return*, *global*, *pass*,
+ or *raise*.
- If *rename* is true, invalid fieldnames are automatically replaced
- with positional names. For example, ``['abc', 'def', 'ghi', 'abc']`` is
- converted to ``['abc', '_1', 'ghi', '_3']``, eliminating the keyword
- ``def`` and the duplicate fieldname ``abc``.
+ If *rename* is true, invalid fieldnames are automatically replaced
+ with positional names. For example, ``['abc', 'def', 'ghi', 'abc']`` is
+ converted to ``['abc', '_1', 'ghi', '_3']``, eliminating the keyword
+ ``def`` and the duplicate fieldname ``abc``.
- If *verbose* is true, the class definition is printed just before being built.
+ If *verbose* is true, the class definition is printed after it is
+ built. This option is outdated; instead, it is simpler to print the
+ :attr:`_source` attribute.
- Named tuple instances do not have per-instance dictionaries, so they are
- lightweight and require no more memory than regular tuples.
+ Named tuple instances do not have per-instance dictionaries, so they are
+ lightweight and require no more memory than regular tuples.
- .. versionchanged:: 3.1
- Added support for *rename*.
+ .. versionchanged:: 3.1
+ Added support for *rename*.
.. doctest::
- :options: +NORMALIZE_WHITESPACE
-
- >>> # Basic example
- >>> Point = namedtuple('Point', ['x', 'y'])
- >>> p = Point(x=10, y=11)
-
- >>> # Example using the verbose option to print the class definition
- >>> Point = namedtuple('Point', 'x y', verbose=True)
- class Point(tuple):
- 'Point(x, y)'
- <BLANKLINE>
- __slots__ = ()
- <BLANKLINE>
- _fields = ('x', 'y')
- <BLANKLINE>
- def __new__(_cls, x, y):
- 'Create a new instance of Point(x, y)'
- return _tuple.__new__(_cls, (x, y))
- <BLANKLINE>
- @classmethod
- def _make(cls, iterable, new=tuple.__new__, len=len):
- 'Make a new Point object from a sequence or iterable'
- result = new(cls, iterable)
- if len(result) != 2:
- raise TypeError('Expected 2 arguments, got %d' % len(result))
- return result
- <BLANKLINE>
- def __repr__(self):
- 'Return a nicely formatted representation string'
- return self.__class__.__name__ + '(x=%r, y=%r)' % self
- <BLANKLINE>
- def _asdict(self):
- 'Return a new OrderedDict which maps field names to their values'
- return OrderedDict(zip(self._fields, self))
- <BLANKLINE>
- __dict__ = property(_asdict)
- <BLANKLINE>
- def _replace(_self, **kwds):
- 'Return a new Point object replacing specified fields with new values'
- result = _self._make(map(kwds.pop, ('x', 'y'), _self))
- if kwds:
- raise ValueError('Got unexpected field names: %r' % list(kwds.keys()))
- return result
- <BLANKLINE>
- def __getnewargs__(self):
- 'Return self as a plain tuple. Used by copy and pickle.'
- return tuple(self)
- <BLANKLINE>
- x = _property(_itemgetter(0), doc='Alias for field number 0')
- y = _property(_itemgetter(1), doc='Alias for field number 1')
-
- >>> p = Point(11, y=22) # instantiate with positional or keyword arguments
- >>> p[0] + p[1] # indexable like the plain tuple (11, 22)
- 33
- >>> x, y = p # unpack like a regular tuple
- >>> x, y
- (11, 22)
- >>> p.x + p.y # fields also accessible by name
- 33
- >>> p # readable __repr__ with a name=value style
- Point(x=11, y=22)
+ :options: +NORMALIZE_WHITESPACE
+
+ >>> # Basic example
+ >>> Point = namedtuple('Point', ['x', 'y'])
+ >>> p = Point(11, y=22) # instantiate with positional or keyword arguments
+ >>> p[0] + p[1] # indexable like the plain tuple (11, 22)
+ 33
+ >>> x, y = p # unpack like a regular tuple
+ >>> x, y
+ (11, 22)
+ >>> p.x + p.y # fields also accessible by name
+ 33
+ >>> p # readable __repr__ with a name=value style
+ Point(x=11, y=22)
Named tuples are especially useful for assigning field names to result tuples returned
by the :mod:`csv` or :mod:`sqlite3` modules::
- EmployeeRecord = namedtuple('EmployeeRecord', 'name, age, title, department, paygrade')
+ EmployeeRecord = namedtuple('EmployeeRecord', 'name, age, title, department, paygrade')
- import csv
- for emp in map(EmployeeRecord._make, csv.reader(open("employees.csv", "rb"))):
- print(emp.name, emp.title)
+ import csv
+ for emp in map(EmployeeRecord._make, csv.reader(open("employees.csv", "rb"))):
+ print(emp.name, emp.title)
- import sqlite3
- conn = sqlite3.connect('/companydata')
- cursor = conn.cursor()
- cursor.execute('SELECT name, age, title, department, paygrade FROM employees')
- for emp in map(EmployeeRecord._make, cursor.fetchall()):
- print(emp.name, emp.title)
+ import sqlite3
+ conn = sqlite3.connect('/companydata')
+ cursor = conn.cursor()
+ cursor.execute('SELECT name, age, title, department, paygrade FROM employees')
+ for emp in map(EmployeeRecord._make, cursor.fetchall()):
+ print(emp.name, emp.title)
In addition to the methods inherited from tuples, named tuples support
-three additional methods and one attribute. To prevent conflicts with
+three additional methods and two attributes. To prevent conflicts with
field names, the method and attribute names start with an underscore.
.. classmethod:: somenamedtuple._make(iterable)
- Class method that makes a new instance from an existing sequence or iterable.
+ Class method that makes a new instance from an existing sequence or iterable.
-.. doctest::
+ .. doctest::
- >>> t = [11, 22]
- >>> Point._make(t)
- Point(x=11, y=22)
+ >>> t = [11, 22]
+ >>> Point._make(t)
+ Point(x=11, y=22)
.. method:: somenamedtuple._asdict()
- Return a new :class:`OrderedDict` which maps field names to their corresponding
- values::
+ Return a new :class:`OrderedDict` which maps field names to their corresponding
+ values. Note, this method is no longer needed now that the same effect can
+ be achieved by using the built-in :func:`vars` function::
- >>> p._asdict()
- OrderedDict([('x', 11), ('y', 22)])
+ >>> vars(p)
+ OrderedDict([('x', 11), ('y', 22)])
- .. versionchanged:: 3.1
- Returns an :class:`OrderedDict` instead of a regular :class:`dict`.
+ .. versionchanged:: 3.1
+ Returns an :class:`OrderedDict` instead of a regular :class:`dict`.
.. method:: somenamedtuple._replace(kwargs)
- Return a new instance of the named tuple replacing specified fields with new
- values:
+ Return a new instance of the named tuple replacing specified fields with new
+ values::
+
+ >>> p = Point(x=11, y=22)
+ >>> p._replace(x=33)
+ Point(x=33, y=22)
+
+ >>> for partnum, record in inventory.items():
+ ... inventory[partnum] = record._replace(price=newprices[partnum], timestamp=time.now())
-::
+.. attribute:: somenamedtuple._source
- >>> p = Point(x=11, y=22)
- >>> p._replace(x=33)
- Point(x=33, y=22)
+ A string with the pure Python source code used to create the named
+ tuple class. The source makes the named tuple self-documenting.
+ It can be printed, executed using :func:`exec`, or saved to a file
+ and imported.
- >>> for partnum, record in inventory.items():
- ... inventory[partnum] = record._replace(price=newprices[partnum], timestamp=time.now())
+ .. versionadded:: 3.3
.. attribute:: somenamedtuple._fields
- Tuple of strings listing the field names. Useful for introspection
- and for creating new named tuple types from existing named tuples.
+ Tuple of strings listing the field names. Useful for introspection
+ and for creating new named tuple types from existing named tuples.
-.. doctest::
+ .. doctest::
- >>> p._fields # view the field names
- ('x', 'y')
+ >>> p._fields # view the field names
+ ('x', 'y')
- >>> Color = namedtuple('Color', 'red green blue')
- >>> Pixel = namedtuple('Pixel', Point._fields + Color._fields)
- >>> Pixel(11, 22, 128, 255, 0)
- Pixel(x=11, y=22, red=128, green=255, blue=0)
+ >>> Color = namedtuple('Color', 'red green blue')
+ >>> Pixel = namedtuple('Pixel', Point._fields + Color._fields)
+ >>> Pixel(11, 22, 128, 255, 0)
+ Pixel(x=11, y=22, red=128, green=255, blue=0)
To retrieve a field whose name is stored in a string, use the :func:`getattr`
function:
@@ -734,31 +868,30 @@ function:
To convert a dictionary to a named tuple, use the double-star-operator
(as described in :ref:`tut-unpacking-arguments`):
- >>> d = {'x': 11, 'y': 22}
- >>> Point(**d)
- Point(x=11, y=22)
+ >>> d = {'x': 11, 'y': 22}
+ >>> Point(**d)
+ Point(x=11, y=22)
Since a named tuple is a regular Python class, it is easy to add or change
functionality with a subclass. Here is how to add a calculated field and
a fixed-width print format:
>>> class Point(namedtuple('Point', 'x y')):
- __slots__ = ()
- @property
- def hypot(self):
- return (self.x ** 2 + self.y ** 2) ** 0.5
- def __str__(self):
- return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
+ __slots__ = ()
+ @property
+ def hypot(self):
+ return (self.x ** 2 + self.y ** 2) ** 0.5
+ def __str__(self):
+ return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
>>> for p in Point(3, 4), Point(14, 5/7):
- print(p)
+ print(p)
Point: x= 3.000 y= 4.000 hypot= 5.000
Point: x=14.000 y= 0.714 hypot=14.018
The subclass shown above sets ``__slots__`` to an empty tuple. This helps
keep memory requirements low by preventing the creation of instance dictionaries.
-
Subclassing is not useful for adding new, stored fields. Instead, simply
create a new named tuple type from the :attr:`_fields` attribute:
@@ -770,6 +903,7 @@ customize a prototype instance:
>>> Account = namedtuple('Account', 'owner balance transaction_count')
>>> default_account = Account('<owner name>', 0.0, 0)
>>> johns_account = default_account._replace(owner='John')
+ >>> janes_account = default_account._replace(owner='Jane')
Enumerated constants can be implemented with named tuples, but it is simpler
and more efficient to use a simple class declaration:
@@ -778,19 +912,19 @@ and more efficient to use a simple class declaration:
>>> Status.open, Status.pending, Status.closed
(0, 1, 2)
>>> class Status:
- open, pending, closed = range(3)
+ open, pending, closed = range(3)
.. seealso::
- * `Named tuple recipe <http://code.activestate.com/recipes/500261/>`_
- adapted for Python 2.4.
+ * `Named tuple recipe <http://code.activestate.com/recipes/500261/>`_
+ adapted for Python 2.4.
- * `Recipe for named tuple abstract base class with a metaclass mix-in
- <http://code.activestate.com/recipes/577629-namedtupleabc-abstract-base-class-mix-in-for-named/>`_
- by Jan Kaliszewski. Besides providing an :term:`abstract base class` for
- named tuples, it also supports an alternate :term:`metaclass`-based
- constructor that is convenient for use cases where named tuples are being
- subclassed.
+ * `Recipe for named tuple abstract base class with a metaclass mix-in
+ <http://code.activestate.com/recipes/577629-namedtupleabc-abstract-base-class-mix-in-for-named/>`_
+ by Jan Kaliszewski. Besides providing an :term:`abstract base class` for
+ named tuples, it also supports an alternate :term:`metaclass`-based
+ constructor that is convenient for use cases where named tuples are being
+ subclassed.
:class:`OrderedDict` objects
@@ -802,36 +936,36 @@ the items are returned in the order their keys were first added.
.. class:: OrderedDict([items])
- Return an instance of a dict subclass, supporting the usual :class:`dict`
- methods. An *OrderedDict* is a dict that remembers the order that keys
- were first inserted. If a new entry overwrites an existing entry, the
- original insertion position is left unchanged. Deleting an entry and
- reinserting it will move it to the end.
+ Return an instance of a dict subclass, supporting the usual :class:`dict`
+ methods. An *OrderedDict* is a dict that remembers the order that keys
+ were first inserted. If a new entry overwrites an existing entry, the
+ original insertion position is left unchanged. Deleting an entry and
+ reinserting it will move it to the end.
- .. versionadded:: 3.1
+ .. versionadded:: 3.1
- .. method:: popitem(last=True)
+ .. method:: popitem(last=True)
- The :meth:`popitem` method for ordered dictionaries returns and removes a
- (key, value) pair. The pairs are returned in LIFO order if *last* is true
- or FIFO order if false.
+ The :meth:`popitem` method for ordered dictionaries returns and removes a
+ (key, value) pair. The pairs are returned in LIFO order if *last* is true
+ or FIFO order if false.
- .. method:: move_to_end(key, last=True)
+ .. method:: move_to_end(key, last=True)
- Move an existing *key* to either end of an ordered dictionary. The item
- is moved to the right end if *last* is true (the default) or to the
- beginning if *last* is false. Raises :exc:`KeyError` if the *key* does
- not exist::
+ Move an existing *key* to either end of an ordered dictionary. The item
+ is moved to the right end if *last* is true (the default) or to the
+ beginning if *last* is false. Raises :exc:`KeyError` if the *key* does
+ not exist::
- >>> d = OrderedDict.fromkeys('abcde')
- >>> d.move_to_end('b')
- >>> ''.join(d.keys())
- 'acdeb'
- >>> d.move_to_end('b', last=False)
- >>> ''.join(d.keys())
- 'bacde'
+ >>> d = OrderedDict.fromkeys('abcde')
+ >>> d.move_to_end('b')
+ >>> ''.join(d.keys())
+ 'acdeb'
+ >>> d.move_to_end('b', last=False)
+ >>> ''.join(d.keys())
+ 'bacde'
- .. versionadded:: 3.2
+ .. versionadded:: 3.2
In addition to the usual mapping methods, ordered dictionaries also support
reverse iteration using :func:`reversed`.
@@ -849,8 +983,8 @@ semantics pass-in keyword arguments using a regular unordered dictionary.
.. seealso::
- `Equivalent OrderedDict recipe <http://code.activestate.com/recipes/576693/>`_
- that runs on Python 2.4 or later.
+ `Equivalent OrderedDict recipe <http://code.activestate.com/recipes/576693/>`_
+ that runs on Python 2.4 or later.
:class:`OrderedDict` Examples and Recipes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -893,7 +1027,7 @@ original insertion position is changed and moved to the end::
An ordered dictionary can be combined with the :class:`Counter` class
so that the counter remembers the order elements are first encountered::
- class OrderedCounter(Counter, OrderedDict):
+ class OrderedCounter(Counter, OrderedDict):
'Counter that remembers the order elements are first encountered'
def __repr__(self):
@@ -914,19 +1048,19 @@ attribute.
.. class:: UserDict([initialdata])
- Class that simulates a dictionary. The instance's contents are kept in a
- regular dictionary, which is accessible via the :attr:`data` attribute of
- :class:`UserDict` instances. If *initialdata* is provided, :attr:`data` is
- initialized with its contents; note that a reference to *initialdata* will not
- be kept, allowing it be used for other purposes.
+ Class that simulates a dictionary. The instance's contents are kept in a
+ regular dictionary, which is accessible via the :attr:`data` attribute of
+ :class:`UserDict` instances. If *initialdata* is provided, :attr:`data` is
+ initialized with its contents; note that a reference to *initialdata* will not
+ be kept, allowing it be used for other purposes.
- In addition to supporting the methods and operations of mappings,
- :class:`UserDict` instances provide the following attribute:
+ In addition to supporting the methods and operations of mappings,
+ :class:`UserDict` instances provide the following attribute:
- .. attribute:: data
+ .. attribute:: data
- A real dictionary used to store the contents of the :class:`UserDict`
- class.
+ A real dictionary used to store the contents of the :class:`UserDict`
+ class.
@@ -944,19 +1078,19 @@ to work with because the underlying list is accessible as an attribute.
.. class:: UserList([list])
- Class that simulates a list. The instance's contents are kept in a regular
- list, which is accessible via the :attr:`data` attribute of :class:`UserList`
- instances. The instance's contents are initially set to a copy of *list*,
- defaulting to the empty list ``[]``. *list* can be any iterable, for
- example a real Python list or a :class:`UserList` object.
+ Class that simulates a list. The instance's contents are kept in a regular
+ list, which is accessible via the :attr:`data` attribute of :class:`UserList`
+ instances. The instance's contents are initially set to a copy of *list*,
+ defaulting to the empty list ``[]``. *list* can be any iterable, for
+ example a real Python list or a :class:`UserList` object.
- In addition to supporting the methods and operations of mutable sequences,
- :class:`UserList` instances provide the following attribute:
+ In addition to supporting the methods and operations of mutable sequences,
+ :class:`UserList` instances provide the following attribute:
- .. attribute:: data
+ .. attribute:: data
- A real :class:`list` object used to store the contents of the
- :class:`UserList` class.
+ A real :class:`list` object used to store the contents of the
+ :class:`UserList` class.
**Subclassing requirements:** Subclasses of :class:`UserList` are expect to
offer a constructor which can be called with either no arguments or one
@@ -981,168 +1115,10 @@ attribute.
.. class:: UserString([sequence])
- Class that simulates a string or a Unicode string object. The instance's
- content is kept in a regular string object, which is accessible via the
- :attr:`data` attribute of :class:`UserString` instances. The instance's
- contents are initially set to a copy of *sequence*. The *sequence* can
- be an instance of :class:`bytes`, :class:`str`, :class:`UserString` (or a
- subclass) or an arbitrary sequence which can be converted into a string using
- the built-in :func:`str` function.
-
-
-.. _collections-abstract-base-classes:
-
-ABCs - abstract base classes
-----------------------------
-
-The collections module offers the following :term:`ABCs <abstract base class>`:
-
-========================= ===================== ====================== ====================================================
-ABC Inherits from Abstract Methods Mixin Methods
-========================= ===================== ====================== ====================================================
-:class:`Container` ``__contains__``
-:class:`Hashable` ``__hash__``
-:class:`Iterable` ``__iter__``
-:class:`Iterator` :class:`Iterable` ``__next__`` ``__iter__``
-:class:`Sized` ``__len__``
-:class:`Callable` ``__call__``
-
-:class:`Sequence` :class:`Sized`, ``__getitem__`` ``__contains__``, ``__iter__``, ``__reversed__``,
- :class:`Iterable`, ``index``, and ``count``
- :class:`Container`
-
-:class:`MutableSequence` :class:`Sequence` ``__setitem__``, Inherited :class:`Sequence` methods and
- ``__delitem__``, ``append``, ``reverse``, ``extend``, ``pop``,
- ``insert`` ``remove``, and ``__iadd__``
-
-:class:`Set` :class:`Sized`, ``__le__``, ``__lt__``, ``__eq__``, ``__ne__``,
- :class:`Iterable`, ``__gt__``, ``__ge__``, ``__and__``, ``__or__``,
- :class:`Container` ``__sub__``, ``__xor__``, and ``isdisjoint``
-
-:class:`MutableSet` :class:`Set` ``add``, Inherited :class:`Set` methods and
- ``discard`` ``clear``, ``pop``, ``remove``, ``__ior__``,
- ``__iand__``, ``__ixor__``, and ``__isub__``
-
-:class:`Mapping` :class:`Sized`, ``__getitem__`` ``__contains__``, ``keys``, ``items``, ``values``,
- :class:`Iterable`, ``get``, ``__eq__``, and ``__ne__``
- :class:`Container`
-
-:class:`MutableMapping` :class:`Mapping` ``__setitem__``, Inherited :class:`Mapping` methods and
- ``__delitem__`` ``pop``, ``popitem``, ``clear``, ``update``,
- and ``setdefault``
-
-
-:class:`MappingView` :class:`Sized` ``__len__``
-:class:`ItemsView` :class:`MappingView`, ``__contains__``,
- :class:`Set` ``__iter__``
-:class:`KeysView` :class:`MappingView`, ``__contains__``,
- :class:`Set` ``__iter__``
-:class:`ValuesView` :class:`MappingView` ``__contains__``, ``__iter__``
-========================= ===================== ====================== ====================================================
-
-
-.. class:: Container
- Hashable
- Sized
- Callable
-
- ABCs for classes that provide respectively the methods :meth:`__contains__`,
- :meth:`__hash__`, :meth:`__len__`, and :meth:`__call__`.
-
-.. class:: Iterable
-
- ABC for classes that provide the :meth:`__iter__` method.
- See also the definition of :term:`iterable`.
-
-.. class:: Iterator
-
- ABC for classes that provide the :meth:`__iter__` and :meth:`next` methods.
- See also the definition of :term:`iterator`.
-
-.. class:: Sequence
- MutableSequence
-
- ABCs for read-only and mutable :term:`sequences <sequence>`.
-
-.. class:: Set
- MutableSet
-
- ABCs for read-only and mutable sets.
-
-.. class:: Mapping
- MutableMapping
-
- ABCs for read-only and mutable :term:`mappings <mapping>`.
-
-.. class:: MappingView
- ItemsView
- KeysView
- ValuesView
-
- ABCs for mapping, items, keys, and values :term:`views <view>`.
-
-
-These ABCs allow us to ask classes or instances if they provide
-particular functionality, for example::
-
- size = None
- if isinstance(myvar, collections.Sized):
- size = len(myvar)
-
-Several of the ABCs are also useful as mixins that make it easier to develop
-classes supporting container APIs. For example, to write a class supporting
-the full :class:`Set` API, it only necessary to supply the three underlying
-abstract methods: :meth:`__contains__`, :meth:`__iter__`, and :meth:`__len__`.
-The ABC supplies the remaining methods such as :meth:`__and__` and
-:meth:`isdisjoint` ::
-
- class ListBasedSet(collections.Set):
- ''' Alternate set implementation favoring space over speed
- and not requiring the set elements to be hashable. '''
- def __init__(self, iterable):
- self.elements = lst = []
- for value in iterable:
- if value not in lst:
- lst.append(value)
- def __iter__(self):
- return iter(self.elements)
- def __contains__(self, value):
- return value in self.elements
- def __len__(self):
- return len(self.elements)
-
- s1 = ListBasedSet('abcdef')
- s2 = ListBasedSet('defghi')
- overlap = s1 & s2 # The __and__() method is supported automatically
-
-Notes on using :class:`Set` and :class:`MutableSet` as a mixin:
-
-(1)
- Since some set operations create new sets, the default mixin methods need
- a way to create new instances from an iterable. The class constructor is
- assumed to have a signature in the form ``ClassName(iterable)``.
- That assumption is factored-out to an internal classmethod called
- :meth:`_from_iterable` which calls ``cls(iterable)`` to produce a new set.
- If the :class:`Set` mixin is being used in a class with a different
- constructor signature, you will need to override :meth:`_from_iterable`
- with a classmethod that can construct new instances from
- an iterable argument.
-
-(2)
- To override the comparisons (presumably for speed, as the
- semantics are fixed), redefine :meth:`__le__` and
- then the other operations will automatically follow suit.
-
-(3)
- The :class:`Set` mixin provides a :meth:`_hash` method to compute a hash value
- for the set; however, :meth:`__hash__` is not defined because not all sets
- are hashable or immutable. To add set hashabilty using mixins,
- inherit from both :meth:`Set` and :meth:`Hashable`, then define
- ``__hash__ = Set._hash``.
-
-.. seealso::
-
- * `OrderedSet recipe <http://code.activestate.com/recipes/576694/>`_ for an
- example built on :class:`MutableSet`.
-
- * For more about ABCs, see the :mod:`abc` module and :pep:`3119`.
+ Class that simulates a string or a Unicode string object. The instance's
+ content is kept in a regular string object, which is accessible via the
+ :attr:`data` attribute of :class:`UserString` instances. The instance's
+ contents are initially set to a copy of *sequence*. The *sequence* can
+ be an instance of :class:`bytes`, :class:`str`, :class:`UserString` (or a
+ subclass) or an arbitrary sequence which can be converted into a string using
+ the built-in :func:`str` function.
diff --git a/Doc/library/concurrency.rst b/Doc/library/concurrency.rst
new file mode 100644
index 0000000..fd2dae2
--- /dev/null
+++ b/Doc/library/concurrency.rst
@@ -0,0 +1,32 @@
+.. _concurrency:
+
+********************
+Concurrent Execution
+********************
+
+The modules described in this chapter provide support for concurrent
+execution of code. The appropriate choice of tool will depend on the
+task to be executed (CPU bound vs IO bound) and preferred style of
+development (event driven cooperative multitasking vs preemptive
+multitasking) Here's an overview:
+
+
+.. toctree::
+
+ threading.rst
+ multiprocessing.rst
+ concurrent.rst
+ concurrent.futures.rst
+ subprocess.rst
+ sched.rst
+ queue.rst
+ select.rst
+
+
+The following are support modules for some of the above services:
+
+.. toctree::
+
+ dummy_threading.rst
+ _thread.rst
+ _dummy_thread.rst
diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst
index eee2285..7ed99d5 100644
--- a/Doc/library/concurrent.futures.rst
+++ b/Doc/library/concurrent.futures.rst
@@ -136,20 +136,23 @@ ThreadPoolExecutor Example
'http://www.bbc.co.uk/',
'http://some-made-up-domain.com/']
+ # Retrieve a single page and report the url and contents
def load_url(url, timeout):
- return urllib.request.urlopen(url, timeout=timeout).read()
+ conn = urllib.request.urlopen(url, timeout=timeout)
+ return conn.readall()
+ # We can use a with statement to ensure threads are cleaned up promptly
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
- future_to_url = dict((executor.submit(load_url, url, 60), url)
- for url in URLS)
-
+ # Start the load operations and mark each future with its URL
+ future_to_url = {executor.submit(load_url, url, 60):url for url in URLS}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
- if future.exception() is not None:
- print('%r generated an exception: %s' % (url,
- future.exception()))
+ try:
+ data = future.result()
+ except Exception as exc:
+ print('%r generated an exception: %s' % (url, exc))
else:
- print('%r page is %d bytes' % (url, len(future.result())))
+ print('%r page is %d bytes' % (url, len(data)))
ProcessPoolExecutor
@@ -170,6 +173,12 @@ to a :class:`ProcessPoolExecutor` will result in deadlock.
of at most *max_workers* processes. If *max_workers* is ``None`` or not
given, it will default to the number of processors on the machine.
+ .. versionchanged:: 3.3
+ When one of the worker processes terminates abruptly, a
+ :exc:`BrokenProcessPool` error is now raised. Previously, behaviour
+ was undefined but operations on the executor or its futures would often
+ freeze or deadlock.
+
.. _processpoolexecutor-example:
@@ -371,3 +380,16 @@ Module Functions
:pep:`3148` -- futures - execute computations asynchronously
The proposal which described this feature for inclusion in the Python
standard library.
+
+
+Exception classes
+-----------------
+
+.. exception:: BrokenProcessPool
+
+ Derived from :exc:`RuntimeError`, this exception class is raised when
+ one of the workers of a :class:`ProcessPoolExecutor` has terminated
+ in a non-clean fashion (for example, if it was killed from the outside).
+
+ .. versionadded:: 3.3
+
diff --git a/Doc/library/concurrent.rst b/Doc/library/concurrent.rst
new file mode 100644
index 0000000..2eba536
--- /dev/null
+++ b/Doc/library/concurrent.rst
@@ -0,0 +1,6 @@
+The :mod:`concurrent` package
+=============================
+
+Currently, there is only one module in this package:
+
+* :mod:`concurrent.futures` -- Launching parallel tasks
diff --git a/Doc/library/contextlib.rst b/Doc/library/contextlib.rst
index e8dc17f..ed9ebb8 100644
--- a/Doc/library/contextlib.rst
+++ b/Doc/library/contextlib.rst
@@ -12,8 +12,11 @@ This module provides utilities for common tasks involving the :keyword:`with`
statement. For more information see also :ref:`typecontextmanager` and
:ref:`context-managers`.
-Functions provided:
+Utilities
+---------
+
+Functions and classes provided:
.. decorator:: contextmanager
@@ -168,6 +171,348 @@ Functions provided:
.. versionadded:: 3.2
+.. class:: ExitStack()
+
+ A context manager that is designed to make it easy to programmatically
+ combine other context managers and cleanup functions, especially those
+ that are optional or otherwise driven by input data.
+
+ For example, a set of files may easily be handled in a single with
+ statement as follows::
+
+ with ExitStack() as stack:
+ files = [stack.enter_context(open(fname)) for fname in filenames]
+ # All opened files will automatically be closed at the end of
+ # the with statement, even if attempts to open files later
+ # in the list throw an exception
+
+ Each instance maintains a stack of registered callbacks that are called in
+ reverse order when the instance is closed (either explicitly or implicitly
+ at the end of a :keyword:`with` statement). Note that callbacks are *not*
+ invoked implicitly when the context stack instance is garbage collected.
+
+ This stack model is used so that context managers that acquire their
+ resources in their ``__init__`` method (such as file objects) can be
+ handled correctly.
+
+ Since registered callbacks are invoked in the reverse order of
+ registration, this ends up behaving as if multiple nested :keyword:`with`
+ statements had been used with the registered set of callbacks. This even
+ extends to exception handling - if an inner callback suppresses or replaces
+ an exception, then outer callbacks will be passed arguments based on that
+ updated state.
+
+ This is a relatively low level API that takes care of the details of
+ correctly unwinding the stack of exit callbacks. It provides a suitable
+ foundation for higher level context managers that manipulate the exit
+ stack in application specific ways.
+
+ .. versionadded:: 3.3
+
+ .. method:: enter_context(cm)
+
+ Enters a new context manager and adds its :meth:`__exit__` method to
+ the callback stack. The return value is the result of the context
+ manager's own :meth:`__enter__` method.
+
+ These context managers may suppress exceptions just as they normally
+ would if used directly as part of a :keyword:`with` statement.
+
+ .. method:: push(exit)
+
+ Adds a context manager's :meth:`__exit__` method to the callback stack.
+
+ As ``__enter__`` is *not* invoked, this method can be used to cover
+ part of an :meth:`__enter__` implementation with a context manager's own
+ :meth:`__exit__` method.
+
+ If passed an object that is not a context manager, this method assumes
+ it is a callback with the same signature as a context manager's
+ :meth:`__exit__` method and adds it directly to the callback stack.
+
+ By returning true values, these callbacks can suppress exceptions the
+ same way context manager :meth:`__exit__` methods can.
+
+ The passed in object is returned from the function, allowing this
+ method to be used as a function decorator.
+
+ .. method:: callback(callback, *args, **kwds)
+
+ Accepts an arbitrary callback function and arguments and adds it to
+ the callback stack.
+
+ Unlike the other methods, callbacks added this way cannot suppress
+ exceptions (as they are never passed the exception details).
+
+ The passed in callback is returned from the function, allowing this
+ method to be used as a function decorator.
+
+ .. method:: pop_all()
+
+ Transfers the callback stack to a fresh :class:`ExitStack` instance
+ and returns it. No callbacks are invoked by this operation - instead,
+ they will now be invoked when the new stack is closed (either
+ explicitly or implicitly at the end of a :keyword:`with` statement).
+
+ For example, a group of files can be opened as an "all or nothing"
+ operation as follows::
+
+ with ExitStack() as stack:
+ files = [stack.enter_context(open(fname)) for fname in filenames]
+ close_files = stack.pop_all().close
+ # If opening any file fails, all previously opened files will be
+ # closed automatically. If all files are opened successfully,
+ # they will remain open even after the with statement ends.
+ # close_files() can then be invoked explicitly to close them all
+
+ .. method:: close()
+
+ Immediately unwinds the callback stack, invoking callbacks in the
+ reverse order of registration. For any context managers and exit
+ callbacks registered, the arguments passed in will indicate that no
+ exception occurred.
+
+
+Examples and Recipes
+--------------------
+
+This section describes some examples and recipes for making effective use of
+the tools provided by :mod:`contextlib`.
+
+
+Supporting a variable number of context managers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The primary use case for :class:`ExitStack` is the one given in the class
+documentation: supporting a variable number of context managers and other
+cleanup operations in a single :keyword:`with` statement. The variability
+may come from the number of context managers needed being driven by user
+input (such as opening a user specified collection of files), or from
+some of the context managers being optional::
+
+ with ExitStack() as stack:
+ for resource in resources:
+ stack.enter_context(resource)
+ if need_special resource:
+ special = acquire_special_resource()
+ stack.callback(release_special_resource, special)
+ # Perform operations that use the acquired resources
+
+As shown, :class:`ExitStack` also makes it quite easy to use :keyword:`with`
+statements to manage arbitrary resources that don't natively support the
+context management protocol.
+
+
+Simplifying support for single optional context managers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In the specific case of a single optional context manager, :class:`ExitStack`
+instances can be used as a "do nothing" context manager, allowing a context
+manager to easily be omitted without affecting the overall structure of
+the source code::
+
+ def debug_trace(details):
+ if __debug__:
+ return TraceContext(details)
+ # Don't do anything special with the context in release mode
+ return ExitStack()
+
+ with debug_trace():
+ # Suite is traced in debug mode, but runs normally otherwise
+
+
+Catching exceptions from ``__enter__`` methods
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+It is occasionally desirable to catch exceptions from an ``__enter__``
+method implementation, *without* inadvertently catching exceptions from
+the :keyword:`with` statement body or the context manager's ``__exit__``
+method. By using :class:`ExitStack` the steps in the context management
+protocol can be separated slightly in order to allow this::
+
+ stack = ExitStack()
+ try:
+ x = stack.enter_context(cm)
+ except Exception:
+ # handle __enter__ exception
+ else:
+ with stack:
+ # Handle normal case
+
+Actually needing to do this is likely to indicate that the underlying API
+should be providing a direct resource management interface for use with
+:keyword:`try`/:keyword:`except`/:keyword:`finally` statements, but not
+all APIs are well designed in that regard. When a context manager is the
+only resource management API provided, then :class:`ExitStack` can make it
+easier to handle various situations that can't be handled directly in a
+:keyword:`with` statement.
+
+
+Cleaning up in an ``__enter__`` implementation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+As noted in the documentation of :meth:`ExitStack.push`, this
+method can be useful in cleaning up an already allocated resource if later
+steps in the :meth:`__enter__` implementation fail.
+
+Here's an example of doing this for a context manager that accepts resource
+acquisition and release functions, along with an optional validation function,
+and maps them to the context management protocol::
+
+ from contextlib import contextmanager, ExitStack
+
+ class ResourceManager(object):
+
+ def __init__(self, acquire_resource, release_resource, check_resource_ok=None):
+ self.acquire_resource = acquire_resource
+ self.release_resource = release_resource
+ if check_resource_ok is None:
+ def check_resource_ok(resource):
+ return True
+ self.check_resource_ok = check_resource_ok
+
+ @contextmanager
+ def _cleanup_on_error(self):
+ with ExitStack() as stack:
+ stack.push(self)
+ yield
+ # The validation check passed and didn't raise an exception
+ # Accordingly, we want to keep the resource, and pass it
+ # back to our caller
+ stack.pop_all()
+
+ def __enter__(self):
+ resource = self.acquire_resource()
+ with self._cleanup_on_error():
+ if not self.check_resource_ok(resource):
+ msg = "Failed validation for {!r}"
+ raise RuntimeError(msg.format(resource))
+ return resource
+
+ def __exit__(self, *exc_details):
+ # We don't need to duplicate any of our resource release logic
+ self.release_resource()
+
+
+Replacing any use of ``try-finally`` and flag variables
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A pattern you will sometimes see is a ``try-finally`` statement with a flag
+variable to indicate whether or not the body of the ``finally`` clause should
+be executed. In its simplest form (that can't already be handled just by
+using an ``except`` clause instead), it looks something like this::
+
+ cleanup_needed = True
+ try:
+ result = perform_operation()
+ if result:
+ cleanup_needed = False
+ finally:
+ if cleanup_needed:
+ cleanup_resources()
+
+As with any ``try`` statement based code, this can cause problems for
+development and review, because the setup code and the cleanup code can end
+up being separated by arbitrarily long sections of code.
+
+:class:`ExitStack` makes it possible to instead register a callback for
+execution at the end of a ``with`` statement, and then later decide to skip
+executing that callback::
+
+ from contextlib import ExitStack
+
+ with ExitStack() as stack:
+ stack.callback(cleanup_resources)
+ result = perform_operation()
+ if result:
+ stack.pop_all()
+
+This allows the intended cleanup up behaviour to be made explicit up front,
+rather than requiring a separate flag variable.
+
+If a particular application uses this pattern a lot, it can be simplified
+even further by means of a small helper class::
+
+ from contextlib import ExitStack
+
+ class Callback(ExitStack):
+ def __init__(self, callback, *args, **kwds):
+ super(Callback, self).__init__()
+ self.callback(callback, *args, **kwds)
+
+ def cancel(self):
+ self.pop_all()
+
+ with Callback(cleanup_resources) as cb:
+ result = perform_operation()
+ if result:
+ cb.cancel()
+
+If the resource cleanup isn't already neatly bundled into a standalone
+function, then it is still possible to use the decorator form of
+:meth:`ExitStack.callback` to declare the resource cleanup in
+advance::
+
+ from contextlib import ExitStack
+
+ with ExitStack() as stack:
+ @stack.callback
+ def cleanup_resources():
+ ...
+ result = perform_operation()
+ if result:
+ stack.pop_all()
+
+Due to the way the decorator protocol works, a callback function
+declared this way cannot take any parameters. Instead, any resources to
+be released must be accessed as closure variables
+
+
+Using a context manager as a function decorator
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:class:`ContextDecorator` makes it possible to use a context manager in
+both an ordinary ``with`` statement and also as a function decorator.
+
+For example, it is sometimes useful to wrap functions or groups of statements
+with a logger that can track the time of entry and time of exit. Rather than
+writing both a function decorator and a context manager for the task,
+inheriting from :class:`ContextDecorator` provides both capabilities in a
+single definition::
+
+ from contextlib import ContextDecorator
+ import logging
+
+ logging.basicConfig(level=logging.INFO)
+
+ class track_entry_and_exit(ContextDecorator):
+ def __init__(self, name):
+ self.name = name
+
+ def __enter__(self):
+ logging.info('Entering: {}'.format(name))
+
+ def __exit__(self, exc_type, exc, exc_tb):
+ logging.info('Exiting: {}'.format(name))
+
+Instances of this class can be used as both a context manager::
+
+ with track_entry_and_exit('widget loader'):
+ print('Some time consuming activity goes here')
+ load_widget()
+
+And also as a function decorator::
+
+ @track_entry_and_exit('widget loader')
+ def activity():
+ print('Some time consuming activity goes here')
+ load_widget()
+
+Note that there is one additional limitation when using context managers
+as function decorators: there's no way to access the return value of
+:meth:`__enter__`. If that value is needed, then it is still necessary to use
+an explicit ``with`` statement.
+
.. seealso::
:pep:`0343` - The "with" statement
diff --git a/Doc/library/copyreg.rst b/Doc/library/copyreg.rst
index a2d316e..41061e5 100644
--- a/Doc/library/copyreg.rst
+++ b/Doc/library/copyreg.rst
@@ -32,6 +32,8 @@ Such constructors may be factory functions or class instances.
returned by *function* at pickling time. :exc:`TypeError` will be raised if
*object* is a class or *constructor* is not callable.
- See the :mod:`pickle` module for more details on the interface expected of
- *function* and *constructor*.
-
+ See the :mod:`pickle` module for more details on the interface
+ expected of *function* and *constructor*. Note that the
+ :attr:`~pickle.Pickler.dispatch_table` attribute of a pickler
+ object or subclass of :class:`pickle.Pickler` can also be used for
+ declaring reduction functions.
diff --git a/Doc/library/crypt.rst b/Doc/library/crypt.rst
index 0be571e..b4c90cd 100644
--- a/Doc/library/crypt.rst
+++ b/Doc/library/crypt.rst
@@ -15,9 +15,9 @@
This module implements an interface to the :manpage:`crypt(3)` routine, which is
a one-way hash function based upon a modified DES algorithm; see the Unix man
-page for further details. Possible uses include allowing Python scripts to
-accept typed passwords from the user, or attempting to crack Unix passwords with
-a dictionary.
+page for further details. Possible uses include storing hashed passwords
+so you can check passwords without storing the actual password, or attempting
+to crack Unix passwords with a dictionary.
.. index:: single: crypt(3)
@@ -26,15 +26,74 @@ the :manpage:`crypt(3)` routine in the running system. Therefore, any
extensions available on the current implementation will also be available on
this module.
+Hashing Methods
+---------------
-.. function:: crypt(word, salt)
+.. versionadded:: 3.3
+
+The :mod:`crypt` module defines the list of hashing methods (not all methods
+are available on all platforms):
+
+.. data:: METHOD_SHA512
+
+ A Modular Crypt Format method with 16 character salt and 86 character
+ hash. This is the strongest method.
+
+.. data:: METHOD_SHA256
+
+ Another Modular Crypt Format method with 16 character salt and 43
+ character hash.
+
+.. data:: METHOD_MD5
+
+ Another Modular Crypt Format method with 8 character salt and 22
+ character hash.
+
+.. data:: METHOD_CRYPT
+
+ The traditional method with a 2 character salt and 13 characters of
+ hash. This is the weakest method.
+
+
+Module Attributes
+-----------------
+
+.. versionadded:: 3.3
+
+.. attribute:: methods
+
+ A list of available password hashing algorithms, as
+ ``crypt.METHOD_*`` objects. This list is sorted from strongest to
+ weakest, and is guaranteed to have at least ``crypt.METHOD_CRYPT``.
+
+
+Module Functions
+----------------
+
+The :mod:`crypt` module defines the following functions:
+
+.. function:: crypt(word, salt=None)
*word* will usually be a user's password as typed at a prompt or in a graphical
- interface. *salt* is usually a random two-character string which will be used
- to perturb the DES algorithm in one of 4096 ways. The characters in *salt* must
- be in the set ``[./a-zA-Z0-9]``. Returns the hashed password as a string, which
- will be composed of characters from the same alphabet as the salt (the first two
- characters represent the salt itself).
+ interface. The optional *salt* is either a string as returned from
+ :func:`mksalt`, one of the ``crypt.METHOD_*`` values (though not all
+ may be available on all platforms), or a full encrypted password
+ including salt, as returned by this function. If *salt* is not
+ provided, the strongest method will be used (as returned by
+ :func:`methods`.
+
+ Checking a password is usually done by passing the plain-text password
+ as *word* and the full results of a previous :func:`crypt` call,
+ which should be the same as the results of this call.
+
+ *salt* (either a random 2 or 16 character string, possibly prefixed with
+ ``$digit$`` to indicate the method) which will be used to perturb the
+ encryption algorithm. The characters in *salt* must be in the set
+ ``[./a-zA-Z0-9]``, with the exception of Modular Crypt Format which
+ prefixes a ``$digit$``.
+
+ Returns the hashed password as a string, which will be composed of
+ characters from the same alphabet as the salt.
.. index:: single: crypt(3)
@@ -42,18 +101,52 @@ this module.
different sizes in the *salt*, it is recommended to use the full crypted
password as salt when checking for a password.
-A simple example illustrating typical use::
+ .. versionchanged:: 3.3
+ Accept ``crypt.METHOD_*`` values in addition to strings for *salt*.
+
+
+.. function:: mksalt(method=None)
- import crypt, getpass, pwd
+ Return a randomly generated salt of the specified method. If no
+ *method* is given, the strongest method available as returned by
+ :func:`methods` is used.
+
+ The return value is a string either of 2 characters in length for
+ ``crypt.METHOD_CRYPT``, or 19 characters starting with ``$digit$`` and
+ 16 random characters from the set ``[./a-zA-Z0-9]``, suitable for
+ passing as the *salt* argument to :func:`crypt`.
+
+ .. versionadded:: 3.3
+
+Examples
+--------
+
+A simple example illustrating typical use (a constant-time comparison
+operation is needed to limit exposure to timing attacks.
+:func:`hmac.compare_digest` is suitable for this purpose)::
+
+ import pwd
+ import crypt
+ import getpass
+ from hmac import compare_digest as compare_hash
def login():
- username = input('Python login:')
+ username = input('Python login: ')
cryptedpasswd = pwd.getpwnam(username)[1]
if cryptedpasswd:
if cryptedpasswd == 'x' or cryptedpasswd == '*':
- raise "Sorry, currently no support for shadow passwords"
+ raise ValueError('no support for shadow passwords')
cleartext = getpass.getpass()
- return crypt.crypt(cleartext, cryptedpasswd) == cryptedpasswd
+ return compare_hash(crypt.crypt(cleartext, cryptedpasswd), cryptedpasswd)
else:
- return 1
+ return True
+
+To generate a hash of a password using the strongest available method and
+check it against the original::
+
+ import crypt
+ from hmac import compare_digest as compare_hash
+ hashed = crypt.crypt(plaintext)
+ if not compare_hash(hashed, crypt.crypt(plaintext, hashed)):
+ raise ValueError("hashed version doesn't validate against original")
diff --git a/Doc/library/crypto.rst b/Doc/library/crypto.rst
index a233561..469ede49 100644
--- a/Doc/library/crypto.rst
+++ b/Doc/library/crypto.rst
@@ -8,6 +8,7 @@ Cryptographic Services
The modules described in this chapter implement various algorithms of a
cryptographic nature. They are available at the discretion of the installation.
+On Unix systems, the :mod:`crypt` module may also be available.
Here's an overview:
diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst
index d8df7fc..4f19cee 100644
--- a/Doc/library/csv.rst
+++ b/Doc/library/csv.rst
@@ -11,15 +11,15 @@
pair: data; tabular
The so-called CSV (Comma Separated Values) format is the most common import and
-export format for spreadsheets and databases. There is no "CSV standard", so
-the format is operationally defined by the many applications which read and
-write it. The lack of a standard means that subtle differences often exist in
-the data produced and consumed by different applications. These differences can
-make it annoying to process CSV files from multiple sources. Still, while the
-delimiters and quoting characters vary, the overall format is similar enough
-that it is possible to write a single module which can efficiently manipulate
-such data, hiding the details of reading and writing the data from the
-programmer.
+export format for spreadsheets and databases. CSV format was used for many
+years prior to attempts to describe the format in a standardized way in
+:rfc:`4180`. The lack of a well-defined standard means that subtle differences
+often exist in the data produced and consumed by different applications. These
+differences can make it annoying to process CSV files from multiple sources.
+Still, while the delimiters and quoting characters vary, the overall format is
+similar enough that it is possible to write a single module which can
+efficiently manipulate such data, hiding the details of reading and writing the
+data from the programmer.
The :mod:`csv` module implements classes to read and write tabular data in CSV
format. It allows programmers to say, "write this data in the format preferred
diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst
index 3c602c8..3e4afd1 100644
--- a/Doc/library/ctypes.rst
+++ b/Doc/library/ctypes.rst
@@ -39,9 +39,14 @@ loads libraries which export functions using the standard ``cdecl`` calling
convention, while *windll* libraries call functions using the ``stdcall``
calling convention. *oledll* also uses the ``stdcall`` calling convention, and
assumes the functions return a Windows :c:type:`HRESULT` error code. The error
-code is used to automatically raise a :class:`WindowsError` exception when the
+code is used to automatically raise a :class:`OSError` exception when the
function call fails.
+.. versionchanged:: 3.3
+ Windows errors used to raise :exc:`WindowsError`, which is now an alias
+ of :exc:`OSError`.
+
+
Here are some examples for Windows. Note that ``msvcrt`` is the MS standard C
library containing most standard C functions, and uses the cdecl calling
convention::
@@ -189,7 +194,7 @@ argument values::
>>> windll.kernel32.GetModuleHandleA(32) # doctest: +WINDOWS
Traceback (most recent call last):
File "<stdin>", line 1, in ?
- WindowsError: exception: access violation reading 0x00000020
+ OSError: exception: access violation reading 0x00000020
>>>
There are, however, enough ways to crash Python with :mod:`ctypes`, so you
@@ -496,7 +501,7 @@ useful to check for error return values and automatically raise an exception::
Traceback (most recent call last):
File "<stdin>", line 1, in ?
File "<stdin>", line 3, in ValidHandle
- WindowsError: [Errno 126] The specified module could not be found.
+ OSError: [Errno 126] The specified module could not be found.
>>>
``WinError`` is a function which will call Windows ``FormatMessage()`` api to
@@ -926,21 +931,21 @@ Callback functions
:mod:`ctypes` allows to create C callable function pointers from Python callables.
These are sometimes called *callback functions*.
-First, you must create a class for the callback function, the class knows the
+First, you must create a class for the callback function. The class knows the
calling convention, the return type, and the number and types of arguments this
function will receive.
-The CFUNCTYPE factory function creates types for callback functions using the
-normal cdecl calling convention, and, on Windows, the WINFUNCTYPE factory
-function creates types for callback functions using the stdcall calling
-convention.
+The :func:`CFUNCTYPE` factory function creates types for callback functions
+using the ``cdecl`` calling convention. On Windows, the :func:`WINFUNCTYPE`
+factory function creates types for callback functions using the ``stdcall``
+calling convention.
Both of these factory functions are called with the result type as first
argument, and the callback functions expected argument types as the remaining
arguments.
I will present an example here which uses the standard C library's
-:c:func:`qsort` function, this is used to sort items with the help of a callback
+:c:func:`qsort` function, that is used to sort items with the help of a callback
function. :c:func:`qsort` will be used to sort an array of integers::
>>> IntArray5 = c_int * 5
@@ -953,7 +958,7 @@ function. :c:func:`qsort` will be used to sort an array of integers::
items in the data array, the size of one item, and a pointer to the comparison
function, the callback. The callback will then be called with two pointers to
items, and it must return a negative integer if the first item is smaller than
-the second, a zero if they are equal, and a positive integer else.
+the second, a zero if they are equal, and a positive integer otherwise.
So our callback function receives pointers to integers, and must return an
integer. First we create the ``type`` for the callback function::
@@ -961,36 +966,8 @@ integer. First we create the ``type`` for the callback function::
>>> CMPFUNC = CFUNCTYPE(c_int, POINTER(c_int), POINTER(c_int))
>>>
-For the first implementation of the callback function, we simply print the
-arguments we get, and return 0 (incremental development ;-)::
-
- >>> def py_cmp_func(a, b):
- ... print("py_cmp_func", a, b)
- ... return 0
- ...
- >>>
-
-Create the C callable callback::
-
- >>> cmp_func = CMPFUNC(py_cmp_func)
- >>>
-
-And we're ready to go::
-
- >>> qsort(ia, len(ia), sizeof(c_int), cmp_func) # doctest: +WINDOWS
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- py_cmp_func <ctypes.LP_c_long object at 0x00...> <ctypes.LP_c_long object at 0x00...>
- >>>
-
-We know how to access the contents of a pointer, so lets redefine our callback::
+To get started, here is a simple callback that shows the values it gets
+passed::
>>> def py_cmp_func(a, b):
... print("py_cmp_func", a[0], b[0])
@@ -999,23 +976,7 @@ We know how to access the contents of a pointer, so lets redefine our callback::
>>> cmp_func = CMPFUNC(py_cmp_func)
>>>
-Here is what we get on Windows::
-
- >>> qsort(ia, len(ia), sizeof(c_int), cmp_func) # doctest: +WINDOWS
- py_cmp_func 7 1
- py_cmp_func 33 1
- py_cmp_func 99 1
- py_cmp_func 5 1
- py_cmp_func 7 5
- py_cmp_func 33 5
- py_cmp_func 99 5
- py_cmp_func 7 99
- py_cmp_func 33 99
- py_cmp_func 7 33
- >>>
-
-It is funny to see that on linux the sort function seems to work much more
-efficiently, it is doing less comparisons::
+The result::
>>> qsort(ia, len(ia), sizeof(c_int), cmp_func) # doctest: +LINUX
py_cmp_func 5 1
@@ -1025,32 +986,13 @@ efficiently, it is doing less comparisons::
py_cmp_func 1 7
>>>
-Ah, we're nearly done! The last step is to actually compare the two items and
-return a useful result::
+Now we can actually compare the two items and return a useful result::
>>> def py_cmp_func(a, b):
... print("py_cmp_func", a[0], b[0])
... return a[0] - b[0]
...
>>>
-
-Final run on Windows::
-
- >>> qsort(ia, len(ia), sizeof(c_int), CMPFUNC(py_cmp_func)) # doctest: +WINDOWS
- py_cmp_func 33 7
- py_cmp_func 99 33
- py_cmp_func 5 99
- py_cmp_func 1 99
- py_cmp_func 33 7
- py_cmp_func 1 33
- py_cmp_func 5 33
- py_cmp_func 5 7
- py_cmp_func 1 7
- py_cmp_func 5 1
- >>>
-
-and on Linux::
-
>>> qsort(ia, len(ia), sizeof(c_int), CMPFUNC(py_cmp_func)) # doctest: +LINUX
py_cmp_func 5 1
py_cmp_func 33 99
@@ -1059,9 +1001,6 @@ and on Linux::
py_cmp_func 5 7
>>>
-It is quite interesting to see that the Windows :func:`qsort` function needs
-more comparisons than the linux version!
-
As we can easily check, our array is sorted now::
>>> for i in ia: print(i, end=" ")
@@ -1071,9 +1010,9 @@ As we can easily check, our array is sorted now::
**Important note for callback functions:**
-Make sure you keep references to CFUNCTYPE objects as long as they are used from
-C code. :mod:`ctypes` doesn't, and if you don't, they may be garbage collected,
-crashing your program when a callback is made.
+Make sure you keep references to :func:`CFUNCTYPE` objects as long as they are
+used from C code. :mod:`ctypes` doesn't, and if you don't, they may be garbage
+collected, crashing your program when a callback is made.
.. _ctypes-accessing-values-exported-from-dlls:
@@ -1350,7 +1289,10 @@ way is to instantiate one of the following classes:
assumed to return the windows specific :class:`HRESULT` code. :class:`HRESULT`
values contain information specifying whether the function call failed or
succeeded, together with additional error code. If the return value signals a
- failure, an :class:`WindowsError` is automatically raised.
+ failure, an :class:`OSError` is automatically raised.
+
+ .. versionchanged:: 3.3
+ :exc:`WindowsError` used to be raised.
.. class:: WinDLL(name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False)
@@ -1967,11 +1909,14 @@ Utility functions
.. function:: WinError(code=None, descr=None)
Windows only: this function is probably the worst-named thing in ctypes. It
- creates an instance of WindowsError. If *code* is not specified,
+ creates an instance of OSError. If *code* is not specified,
``GetLastError`` is called to determine the error code. If *descr* is not
specified, :func:`FormatError` is called to get a textual description of the
error.
+ .. versionchanged:: 3.3
+ An instance of :exc:`WindowsError` used to be created.
+
.. function:: wstring_at(address, size=-1)
diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst
index 11ab5d0..9e5cb55 100644
--- a/Doc/library/curses.rst
+++ b/Doc/library/curses.rst
@@ -599,6 +599,17 @@ The module :mod:`curses` defines the following functions:
Only one *ch* can be pushed before :meth:`getch` is called.
+.. function:: unget_wch(ch)
+
+ Push *ch* so the next :meth:`get_wch` will return it.
+
+ .. note::
+
+ Only one *ch* can be pushed before :meth:`get_wch` is called.
+
+ .. versionadded:: 3.3
+
+
.. function:: ungetmouse(id, x, y, z, bstate)
Push a :const:`KEY_MOUSE` event onto the input queue, associating the given
@@ -643,7 +654,7 @@ Window Objects
--------------
Window objects, as returned by :func:`initscr` and :func:`newwin` above, have
-the following methods:
+the following methods and attributes:
.. method:: window.addch(ch[, attr])
@@ -831,6 +842,16 @@ the following methods:
event.
+.. attribute:: window.encoding
+
+ Encoding used to encode method arguments (Unicode strings and characters).
+ The encoding attribute is inherited from the parent window when a subwindow
+ is created, for example with :meth:`window.subwin`. By default, the locale
+ encoding is used (see :func:`locale.getpreferredencoding`).
+
+ .. versionadded:: 3.3
+
+
.. method:: window.erase()
Clear the window.
@@ -854,11 +875,20 @@ the following methods:
until a key is pressed.
+.. method:: window.get_wch([y, x])
+
+ Get a wide character. Return a character for most keys, or an integer for
+ function keys, keypad keys, and other special keys.
+
+ .. versionadded:: 3.3
+
+
.. method:: window.getkey([y, x])
Get a character, returning a string instead of an integer, as :meth:`getch`
- does. Function keys, keypad keys and so on return a multibyte string containing
- the key name. In no-delay mode, an exception is raised if there is no input.
+ does. Function keys, keypad keys and other special keys return a multibyte
+ string containing the key name. In no-delay mode, an exception is raised if
+ there is no input.
.. method:: window.getmaxyx()
diff --git a/Doc/library/datatypes.rst b/Doc/library/datatypes.rst
index 6b4a71a..d0382e0 100644
--- a/Doc/library/datatypes.rst
+++ b/Doc/library/datatypes.rst
@@ -21,11 +21,10 @@ The following modules are documented in this chapter:
datetime.rst
calendar.rst
collections.rst
+ collections.abc.rst
heapq.rst
bisect.rst
array.rst
- sched.rst
- queue.rst
weakref.rst
types.rst
copy.rst
diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst
index 88fa01c..7c0aac3 100644
--- a/Doc/library/datetime.rst
+++ b/Doc/library/datetime.rst
@@ -404,12 +404,19 @@ Other constructors, all class methods:
.. classmethod:: date.fromtimestamp(timestamp)
Return the local date corresponding to the POSIX timestamp, such as is returned
- by :func:`time.time`. This may raise :exc:`ValueError`, if the timestamp is out
- of the range of values supported by the platform C :c:func:`localtime` function.
+ by :func:`time.time`. This may raise :exc:`OverflowError`, if the timestamp is out
+ of the range of values supported by the platform C :c:func:`localtime` function,
+ and :exc:`OSError` on :c:func:`localtime` failure.
It's common for this to be restricted to years from 1970 through 2038. Note
that on non-POSIX systems that include leap seconds in their notion of a
timestamp, leap seconds are ignored by :meth:`fromtimestamp`.
+ .. versionchanged:: 3.3
+ Raise :exc:`OverflowError` instead of :exc:`ValueError` if the timestamp
+ is out of the range of values supported by the platform C
+ :c:func:`localtime` function. Raise :exc:`OSError` instead of
+ :exc:`ValueError` on :c:func:`localtime` failure.
+
.. classmethod:: date.fromordinal(ordinal)
@@ -713,23 +720,44 @@ Other constructors, all class methods:
equivalent to
``tz.fromutc(datetime.utcfromtimestamp(timestamp).replace(tzinfo=tz))``.
- :meth:`fromtimestamp` may raise :exc:`ValueError`, if the timestamp is out of
+ :meth:`fromtimestamp` may raise :exc:`OverflowError`, if the timestamp is out of
the range of values supported by the platform C :c:func:`localtime` or
- :c:func:`gmtime` functions. It's common for this to be restricted to years in
+ :c:func:`gmtime` functions, and :exc:`OSError` on :c:func:`localtime` or
+ :c:func:`gmtime` failure.
+ It's common for this to be restricted to years in
1970 through 2038. Note that on non-POSIX systems that include leap seconds in
their notion of a timestamp, leap seconds are ignored by :meth:`fromtimestamp`,
and then it's possible to have two timestamps differing by a second that yield
identical :class:`.datetime` objects. See also :meth:`utcfromtimestamp`.
+ .. versionchanged:: 3.3
+ Raise :exc:`OverflowError` instead of :exc:`ValueError` if the timestamp
+ is out of the range of values supported by the platform C
+ :c:func:`localtime` or :c:func:`gmtime` functions. Raise :exc:`OSError`
+ instead of :exc:`ValueError` on :c:func:`localtime` or :c:func:`gmtime`
+ failure.
+
.. classmethod:: datetime.utcfromtimestamp(timestamp)
Return the UTC :class:`.datetime` corresponding to the POSIX timestamp, with
- :attr:`tzinfo` ``None``. This may raise :exc:`ValueError`, if the timestamp is
- out of the range of values supported by the platform C :c:func:`gmtime` function.
+ :attr:`tzinfo` ``None``. This may raise :exc:`OverflowError`, if the timestamp is
+ out of the range of values supported by the platform C :c:func:`gmtime` function,
+ and :exc:`OSError` on :c:func:`gmtime` failure.
It's common for this to be restricted to years in 1970 through 2038. See also
:meth:`fromtimestamp`.
+ On the POSIX compliant platforms, ``utcfromtimestamp(timestamp)``
+ is equivalent to the following expression::
+
+ datetime(1970, 1, 1) + timedelta(seconds=timestamp)
+
+ .. versionchanged:: 3.3
+ Raise :exc:`OverflowError` instead of :exc:`ValueError` if the timestamp
+ is out of the range of values supported by the platform C
+ :c:func:`gmtime` function. Raise :exc:`OSError` instead of
+ :exc:`ValueError` on :c:func:`gmtime` failure.
+
.. classmethod:: datetime.fromordinal(ordinal)
@@ -873,13 +901,20 @@ Supported operations:
*datetime1* is considered less than *datetime2* when *datetime1* precedes
*datetime2* in time.
- If one comparand is naive and the other is aware, :exc:`TypeError` is raised.
+ If one comparand is naive and the other is aware, :exc:`TypeError`
+ is raised if an order comparison is attempted. For equality
+ comparisons, naive instances are never equal to aware instances.
+
If both comparands are aware, and have the same :attr:`tzinfo` attribute, the
common :attr:`tzinfo` attribute is ignored and the base datetimes are
compared. If both comparands are aware and have different :attr:`tzinfo`
attributes, the comparands are first adjusted by subtracting their UTC
offsets (obtained from ``self.utcoffset()``).
+ .. versionchanged:: 3.3
+ Equality comparisons between naive and aware :class:`datetime`
+ instances don't raise :exc:`TypeError`.
+
.. note::
In order to stop comparison from falling back to the default scheme of comparing
@@ -922,17 +957,22 @@ Instance methods:
datetime with no conversion of date and time data.
-.. method:: datetime.astimezone(tz)
+.. method:: datetime.astimezone(tz=None)
- Return a :class:`.datetime` object with new :attr:`tzinfo` attribute *tz*,
+ Return a :class:`datetime` object with new :attr:`tzinfo` attribute *tz*,
adjusting the date and time data so the result is the same UTC time as
*self*, but in *tz*'s local time.
- *tz* must be an instance of a :class:`tzinfo` subclass, and its
+ If provided, *tz* must be an instance of a :class:`tzinfo` subclass, and its
:meth:`utcoffset` and :meth:`dst` methods must not return ``None``. *self* must
be aware (``self.tzinfo`` must not be ``None``, and ``self.utcoffset()`` must
not return ``None``).
+ If called without arguments (or with ``tz=None``) the system local
+ timezone is assumed. The ``tzinfo`` attribute of the converted
+ datetime instance will be set to an instance of :class:`timezone`
+ with the zone name and offset obtained from the OS.
+
If ``self.tzinfo`` is *tz*, ``self.astimezone(tz)`` is equal to *self*: no
adjustment of date or time data is performed. Else the result is local
time in time zone *tz*, representing the same UTC time as *self*: after
@@ -959,6 +999,9 @@ Instance methods:
# Convert from UTC to tz's local time.
return tz.fromutc(utc)
+ .. versionchanged:: 3.3
+ *tz* now can be omitted.
+
.. method:: datetime.utcoffset()
@@ -1015,6 +1058,39 @@ Instance methods:
Return the proleptic Gregorian ordinal of the date. The same as
``self.date().toordinal()``.
+.. method:: datetime.timestamp()
+
+ Return POSIX timestamp corresponding to the :class:`datetime`
+ instance. The return value is a :class:`float` similar to that
+ returned by :func:`time.time`.
+
+ Naive :class:`datetime` instances are assumed to represent local
+ time and this method relies on the platform C :c:func:`mktime`
+ function to perform the conversion. Since :class:`datetime`
+ supports wider range of values than :c:func:`mktime` on many
+ platforms, this method may raise :exc:`OverflowError` for times far
+ in the past or far in the future.
+
+ For aware :class:`datetime` instances, the return value is computed
+ as::
+
+ (dt - datetime(1970, 1, 1, tzinfo=timezone.utc)).total_seconds()
+
+ .. versionadded:: 3.3
+
+ .. note::
+
+ There is no method to obtain the POSIX timestamp directly from a
+ naive :class:`datetime` instance representing UTC time. If your
+ application uses this convention and your system timezone is not
+ set to UTC, you can obtain the POSIX timestamp by supplying
+ ``tzinfo=timezone.utc``::
+
+ timestamp = dt.replace(tzinfo=timezone.utc).timestamp()
+
+ or by calculating the timestamp directly::
+
+ timestamp = (dt - datetime(1970, 1, 1)) / timedelta(seconds=1)
.. method:: datetime.weekday()
@@ -1254,7 +1330,10 @@ Supported operations:
* comparison of :class:`.time` to :class:`.time`, where *a* is considered less
than *b* when *a* precedes *b* in time. If one comparand is naive and the other
- is aware, :exc:`TypeError` is raised. If both comparands are aware, and have
+ is aware, :exc:`TypeError` is raised if an order comparison is attempted. For equality
+ comparisons, naive instances are never equal to aware instances.
+
+ If both comparands are aware, and have
the same :attr:`tzinfo` attribute, the common :attr:`tzinfo` attribute is
ignored and the base times are compared. If both comparands are aware and
have different :attr:`tzinfo` attributes, the comparands are first adjusted by
@@ -1264,6 +1343,10 @@ Supported operations:
different type, :exc:`TypeError` is raised unless the comparison is ``==`` or
``!=``. The latter cases return :const:`False` or :const:`True`, respectively.
+ .. versionchanged:: 3.3
+ Equality comparisons between naive and aware :class:`time` instances
+ don't raise :exc:`TypeError`.
+
* hash, use as dict key
* efficient pickling
@@ -1587,11 +1670,12 @@ only EST (fixed offset -5 hours), or only EDT (fixed offset -4 hours)).
:class:`timezone` Objects
--------------------------
-A :class:`timezone` object represents a timezone that is defined by a
-fixed offset from UTC. Note that objects of this class cannot be used
-to represent timezone information in the locations where different
-offsets are used in different days of the year or where historical
-changes have been made to civil time.
+The :class:`timezone` class is a subclass of :class:`tzinfo`, each
+instance of which represents a timezone defined by a fixed offset from
+UTC. Note that objects of this class cannot be used to represent
+timezone information in the locations where different offsets are used
+in different days of the year or where historical changes have been
+made to civil time.
.. class:: timezone(offset[, name])
@@ -1760,8 +1844,7 @@ format codes.
| | decimal number [00,99]. | |
+-----------+--------------------------------+-------+
| ``%Y`` | Year with century as a decimal | \(5) |
-| | number [0001,9999] (strptime), | |
-| | [1000,9999] (strftime). | |
+| | number [0001,9999]. | |
+-----------+--------------------------------+-------+
| ``%z`` | UTC offset in the form +HHMM | \(6) |
| | or -HHMM (empty string if the | |
@@ -1795,10 +1878,7 @@ Notes:
calculations when the day of the week and the year are specified.
(5)
- For technical reasons, :meth:`strftime` method does not support
- dates before year 1000: ``t.strftime(format)`` will raise a
- :exc:`ValueError` when ``t.year < 1000`` even if ``format`` does
- not contain ``%Y`` directive. The :meth:`strptime` method can
+ The :meth:`strptime` method can
parse years in the full [1, 9999] range, but years < 1000 must be
zero-filled to 4-digit width.
@@ -1806,6 +1886,10 @@ Notes:
In previous versions, :meth:`strftime` method was restricted to
years >= 1900.
+ .. versionchanged:: 3.3
+ In version 3.2, :meth:`strftime` method was restricted to
+ years >= 1000.
+
(6)
For example, if :meth:`utcoffset` returns ``timedelta(hours=-3, minutes=-30)``,
``%z`` is replaced with the string ``'-0330'``.
diff --git a/Doc/library/debug.rst b/Doc/library/debug.rst
index b2ee4fa..c69fb1c 100644
--- a/Doc/library/debug.rst
+++ b/Doc/library/debug.rst
@@ -10,7 +10,8 @@ allowing you to identify bottlenecks in your programs.
.. toctree::
bdb.rst
+ faulthandler.rst
pdb.rst
profile.rst
timeit.rst
- trace.rst \ No newline at end of file
+ trace.rst
diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst
index ef8b43f..4a2b2d0 100644
--- a/Doc/library/decimal.rst
+++ b/Doc/library/decimal.rst
@@ -9,6 +9,7 @@
.. moduleauthor:: Raymond Hettinger <python at rcn.com>
.. moduleauthor:: Aahz <aahz at pobox.com>
.. moduleauthor:: Tim Peters <tim.one at comcast.net>
+.. moduleauthor:: Stefan Krah <skrah at bytereef.org>
.. sectionauthor:: Raymond D. Hettinger <python at rcn.com>
.. import modules for testing inline doctests with the Sphinx doctest builder
@@ -20,8 +21,9 @@
# make sure each group gets a fresh context
setcontext(Context())
-The :mod:`decimal` module provides support for decimal floating point
-arithmetic. It offers several advantages over the :class:`float` datatype:
+The :mod:`decimal` module provides support for fast correctly-rounded
+decimal floating point arithmetic. It offers several advantages over the
+:class:`float` datatype:
* Decimal "is based on a floating-point model which was designed with people
in mind, and necessarily has a paramount guiding principle -- computers must
@@ -92,7 +94,7 @@ computation. Depending on the needs of the application, signals may be ignored,
considered as informational, or treated as exceptions. The signals in the
decimal module are: :const:`Clamped`, :const:`InvalidOperation`,
:const:`DivisionByZero`, :const:`Inexact`, :const:`Rounded`, :const:`Subnormal`,
-:const:`Overflow`, and :const:`Underflow`.
+:const:`Overflow`, :const:`Underflow` and :const:`FloatOperation`.
For each signal there is a flag and a trap enabler. When a signal is
encountered, its flag is set to one, then, if the trap enabler is
@@ -122,7 +124,7 @@ precision, rounding, or enabled traps::
>>> from decimal import *
>>> getcontext()
- Context(prec=28, rounding=ROUND_HALF_EVEN, Emin=-999999999, Emax=999999999,
+ Context(prec=28, rounding=ROUND_HALF_EVEN, Emin=-999999, Emax=999999,
capitals=1, clamp=0, flags=[], traps=[Overflow, DivisionByZero,
InvalidOperation])
@@ -132,7 +134,7 @@ Decimal instances can be constructed from integers, strings, floats, or tuples.
Construction from an integer or a float performs an exact conversion of the
value of that integer or float. Decimal numbers include special values such as
:const:`NaN` which stands for "Not a number", positive and negative
-:const:`Infinity`, and :const:`-0`.
+:const:`Infinity`, and :const:`-0`::
>>> getcontext().prec = 28
>>> Decimal(10)
@@ -152,6 +154,25 @@ value of that integer or float. Decimal numbers include special values such as
>>> Decimal('-Infinity')
Decimal('-Infinity')
+If the :exc:`FloatOperation` signal is trapped, accidental mixing of
+decimals and floats in constructors or ordering comparisons raises
+an exception::
+
+ >>> c = getcontext()
+ >>> c.traps[FloatOperation] = True
+ >>> Decimal(3.14)
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ decimal.FloatOperation: [<class 'decimal.FloatOperation'>]
+ >>> Decimal('3.5') < 3.7
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ decimal.FloatOperation: [<class 'decimal.FloatOperation'>]
+ >>> Decimal('3.5') == 3.5
+ True
+
+.. versionadded:: 3.3
+
The significance of a new Decimal is determined solely by the number of digits
input. Context precision and rounding only come into play during arithmetic
operations.
@@ -169,6 +190,16 @@ operations.
>>> Decimal('3.1415926535') + Decimal('2.7182818285')
Decimal('5.85988')
+If the internal limits of the C version are exceeded, constructing
+a decimal raises :class:`InvalidOperation`::
+
+ >>> Decimal("1e9999999999999999999")
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ decimal.InvalidOperation: [<class 'decimal.InvalidOperation'>]
+
+.. versionchanged:: 3.3
+
Decimals interact well with much of the rest of Python. Here is a small decimal
floating point flying circus:
@@ -244,7 +275,7 @@ enabled:
Decimal('0.142857142857142857142857142857142857142857142857142857142857')
>>> ExtendedContext
- Context(prec=9, rounding=ROUND_HALF_EVEN, Emin=-999999999, Emax=999999999,
+ Context(prec=9, rounding=ROUND_HALF_EVEN, Emin=-999999, Emax=999999,
capitals=1, clamp=0, flags=[], traps=[])
>>> setcontext(ExtendedContext)
>>> Decimal(1) / Decimal(7)
@@ -269,7 +300,7 @@ using the :meth:`clear_flags` method. ::
>>> Decimal(355) / Decimal(113)
Decimal('3.14159292')
>>> getcontext()
- Context(prec=9, rounding=ROUND_HALF_EVEN, Emin=-999999999, Emax=999999999,
+ Context(prec=9, rounding=ROUND_HALF_EVEN, Emin=-999999, Emax=999999,
capitals=1, clamp=0, flags=[Inexact, Rounded], traps=[])
The *flags* entry shows that the rational approximation to :const:`Pi` was
@@ -358,6 +389,10 @@ Decimal objects
The argument to the constructor is now permitted to be a :class:`float`
instance.
+ .. versionchanged:: 3.3
+ :class:`float` arguments raise an exception if the :exc:`FloatOperation`
+ trap is set. By default the trap is off.
+
Decimal floating point objects share many properties with the other built-in
numeric types such as :class:`float` and :class:`int`. All of the usual math
operations and special methods apply. Likewise, decimal objects can be
@@ -702,6 +737,11 @@ Decimal objects
resulting exponent is greater than :attr:`Emax` or less than
:attr:`Etiny`.
+ .. deprecated:: 3.3
+ *watchexp* is an implementation detail from the pure Python version
+ and is not present in the C version. It will be removed in version
+ 3.4, where it defaults to ``True``.
+
.. method:: radix()
Return ``Decimal(10)``, the radix (base) in which the :class:`Decimal`
@@ -880,39 +920,33 @@ described below. In addition, the module provides three pre-made contexts:
In single threaded environments, it is preferable to not use this context at
all. Instead, simply create contexts explicitly as described below.
- The default values are precision=28, rounding=ROUND_HALF_EVEN, and enabled traps
- for Overflow, InvalidOperation, and DivisionByZero.
+ The default values are :attr:`prec`\ =\ :const:`28`,
+ :attr:`rounding`\ =\ :const:`ROUND_HALF_EVEN`,
+ and enabled traps for :class:`Overflow`, :class:`InvalidOperation`, and
+ :class:`DivisionByZero`.
In addition to the three supplied contexts, new contexts can be created with the
:class:`Context` constructor.
-.. class:: Context(prec=None, rounding=None, traps=None, flags=None, Emin=None, Emax=None, capitals=None, clamp=None)
+.. class:: Context(prec=None, rounding=None, Emin=None, Emax=None, capitals=None, clamp=None, flags=None, traps=None)
Creates a new context. If a field is not specified or is :const:`None`, the
default values are copied from the :const:`DefaultContext`. If the *flags*
field is not specified or is :const:`None`, all flags are cleared.
- The *prec* field is a positive integer that sets the precision for arithmetic
- operations in the context.
+ *prec* is an integer in the range [:const:`1`, :const:`MAX_PREC`] that sets
+ the precision for arithmetic operations in the context.
- The *rounding* option is one of:
-
- * :const:`ROUND_CEILING` (towards :const:`Infinity`),
- * :const:`ROUND_DOWN` (towards zero),
- * :const:`ROUND_FLOOR` (towards :const:`-Infinity`),
- * :const:`ROUND_HALF_DOWN` (to nearest with ties going towards zero),
- * :const:`ROUND_HALF_EVEN` (to nearest with ties going to nearest even integer),
- * :const:`ROUND_HALF_UP` (to nearest with ties going away from zero), or
- * :const:`ROUND_UP` (away from zero).
- * :const:`ROUND_05UP` (away from zero if last digit after rounding towards zero
- would have been 0 or 5; otherwise towards zero)
+ The *rounding* option is one of the constants listed in the section
+ `Rounding Modes`_.
The *traps* and *flags* fields list any signals to be set. Generally, new
contexts should only set traps and leave the flags clear.
The *Emin* and *Emax* fields are integers specifying the outer limits allowable
- for exponents.
+ for exponents. *Emin* must be in the range [:const:`MIN_EMIN`, :const:`0`],
+ *Emax* in the range [:const:`0`, :const:`MAX_EMAX`].
The *capitals* field is either :const:`0` or :const:`1` (the default). If set to
:const:`1`, exponents are printed with a capital :const:`E`; otherwise, a
@@ -951,6 +985,12 @@ In addition to the three supplied contexts, new contexts can be created with the
Resets all of the flags to :const:`0`.
+ .. method:: clear_traps()
+
+ Resets all of the traps to :const:`0`.
+
+ .. versionadded:: 3.3
+
.. method:: copy()
Return a duplicate of the context.
@@ -1250,8 +1290,13 @@ In addition to the three supplied contexts, new contexts can be created with the
With two arguments, compute ``x**y``. If ``x`` is negative then ``y``
must be integral. The result will be inexact unless ``y`` is integral and
the result is finite and can be expressed exactly in 'precision' digits.
- The result should always be correctly rounded, using the rounding mode of
- the current thread's context.
+ The rounding mode of the context is used. Results are always correctly-rounded
+ in the Python version.
+
+ .. versionchanged:: 3.3
+ The C module computes :meth:`power` in terms of the correctly-rounded
+ :meth:`exp` and :meth:`ln` functions. The result is well-defined but
+ only "almost always correctly-rounded".
With three arguments, compute ``(x**y) % modulo``. For the three argument
form, the following restrictions on the arguments hold:
@@ -1339,6 +1384,69 @@ In addition to the three supplied contexts, new contexts can be created with the
.. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+.. _decimal-rounding-modes:
+
+Constants
+---------
+
+The constants in this section are only relevant for the C module. They
+are also included in the pure Python version for compatibility.
+
++---------------------+---------------------+-------------------------------+
+| | 32-bit | 64-bit |
++=====================+=====================+===============================+
+| .. data:: MAX_PREC | :const:`425000000` | :const:`999999999999999999` |
++---------------------+---------------------+-------------------------------+
+| .. data:: MAX_EMAX | :const:`425000000` | :const:`999999999999999999` |
++---------------------+---------------------+-------------------------------+
+| .. data:: MIN_EMIN | :const:`-425000000` | :const:`-999999999999999999` |
++---------------------+---------------------+-------------------------------+
+| .. data:: MIN_ETINY | :const:`-849999999` | :const:`-1999999999999999997` |
++---------------------+---------------------+-------------------------------+
+
+
+.. data:: HAVE_THREADS
+
+ The default value is True. If Python is compiled without threads, the
+ C version automatically disables the expensive thread local context
+ machinery. In this case, the value is False.
+
+Rounding modes
+--------------
+
+.. data:: ROUND_CEILING
+
+ Round towards :const:`Infinity`.
+
+.. data:: ROUND_DOWN
+
+ Round towards zero.
+
+.. data:: ROUND_FLOOR
+
+ Round towards :const:`-Infinity`.
+
+.. data:: ROUND_HALF_DOWN
+
+ Round to nearest with ties going towards zero.
+
+.. data:: ROUND_HALF_EVEN
+
+ Round to nearest with ties going to nearest even integer.
+
+.. data:: ROUND_HALF_UP
+
+ Round to nearest with ties going away from zero.
+
+.. data:: ROUND_UP
+
+ Round away from zero.
+
+.. data:: ROUND_05UP
+
+ Round away from zero if last digit after rounding towards zero would have
+ been 0 or 5; otherwise round towards zero.
+
.. _decimal-signals:
@@ -1403,7 +1511,6 @@ condition.
Infinity / Infinity
x % 0
Infinity % x
- x._rescale( non-integer )
sqrt(-x) and x > 0
0 ** 0
x ** (non-integer)
@@ -1446,6 +1553,23 @@ condition.
Occurs when a subnormal result is pushed to zero by rounding. :class:`Inexact`
and :class:`Subnormal` are also signaled.
+
+.. class:: FloatOperation
+
+ Enable stricter semantics for mixing floats and Decimals.
+
+ If the signal is not trapped (default), mixing floats and Decimals is
+ permitted in the :class:`~decimal.Decimal` constructor,
+ :meth:`~decimal.Context.create_decimal` and all comparison operators.
+ Both conversion and comparisons are exact. Any occurrence of a mixed
+ operation is silently recorded by setting :exc:`FloatOperation` in the
+ context flags. Explicit conversions with :meth:`~decimal.Decimal.from_float`
+ or :meth:`~decimal.Context.create_decimal_from_float` do not set the flag.
+
+ Otherwise (the signal is trapped), only equality comparisons and explicit
+ conversions are silent. All other mixed operations raise :exc:`FloatOperation`.
+
+
The following table summarizes the hierarchy of signals::
exceptions.ArithmeticError(exceptions.Exception)
@@ -1458,10 +1582,12 @@ The following table summarizes the hierarchy of signals::
InvalidOperation
Rounded
Subnormal
+ FloatOperation(DecimalException, exceptions.TypeError)
.. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
.. _decimal-notes:
Floating Point Notes
@@ -1571,7 +1697,7 @@ normalized floating point representations, it is not immediately obvious that
the following calculation returns a value equal to zero:
>>> 1 / Decimal('Infinity')
- Decimal('0E-1000000026')
+ Decimal('0E-1000026')
.. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1583,7 +1709,7 @@ Working with threads
The :func:`getcontext` function accesses a different :class:`Context` object for
each thread. Having separate thread contexts means that threads may make
-changes (such as ``getcontext.prec=10``) without interfering with other threads.
+changes (such as ``getcontext().prec=10``) without interfering with other threads.
Likewise, the :func:`setcontext` function automatically assigns its target to
the current thread.
diff --git a/Doc/library/depgraph-output.png b/Doc/library/depgraph-output.png
new file mode 100644
index 0000000..960bb1b
--- /dev/null
+++ b/Doc/library/depgraph-output.png
Binary files differ
diff --git a/Doc/library/development.rst b/Doc/library/development.rst
index c822e08..2368769 100644
--- a/Doc/library/development.rst
+++ b/Doc/library/development.rst
@@ -19,5 +19,8 @@ The list of modules described in this chapter is:
pydoc.rst
doctest.rst
unittest.rst
+ unittest.mock.rst
+ unittest.mock-examples.rst
2to3.rst
test.rst
+ venv.rst
diff --git a/Doc/library/difflib.rst b/Doc/library/difflib.rst
index bdc37b3..836e240 100644
--- a/Doc/library/difflib.rst
+++ b/Doc/library/difflib.rst
@@ -752,8 +752,8 @@ It is also contained in the Python source distribution, as
# we're passing these as arguments to the diff function
fromdate = time.ctime(os.stat(fromfile).st_mtime)
todate = time.ctime(os.stat(tofile).st_mtime)
- fromlines = open(fromfile, 'U').readlines()
- tolines = open(tofile, 'U').readlines()
+ with open(fromlines) as fromf, open(tofile) as tof:
+ fromlines, tolines = list(fromf), list(tof)
if options.u:
diff = difflib.unified_diff(fromlines, tolines, fromfile, tofile,
diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst
index 108cda7..854c521 100644
--- a/Doc/library/dis.rst
+++ b/Doc/library/dis.rst
@@ -171,11 +171,6 @@ The Python compiler currently generates the following bytecode instructions.
**General instructions**
-.. opcode:: STOP_CODE
-
- Indicates end-of-code to the compiler, not used by the interpreter.
-
-
.. opcode:: NOP
Do nothing code. Used as a placeholder by the bytecode optimizer.
@@ -436,6 +431,13 @@ the stack so that it is available for further iterations of the loop.
Pops ``TOS`` and yields it from a :term:`generator`.
+.. opcode:: YIELD_FROM
+
+ Pops ``TOS`` and delegates to it as a subiterator from a :term:`generator`.
+
+ .. versionadded:: 3.3
+
+
.. opcode:: IMPORT_STAR
Loads all symbols not starting with ``'_'`` directly from the module TOS to the
@@ -752,9 +754,10 @@ the more significant byte last.
.. opcode:: MAKE_FUNCTION (argc)
- Pushes a new function object on the stack. TOS is the code associated with the
- function. The function object is defined to have *argc* default parameters,
- which are found below TOS.
+ Pushes a new function object on the stack. TOS is the
+ :term:`qualified name` of the function; TOS1 is the code associated with
+ the function. The function object is defined to have *argc* default parameters,
+ which are found below TOS1.
.. opcode:: MAKE_CLOSURE (argc)
diff --git a/Doc/library/distutils.rst b/Doc/library/distutils.rst
index 238b79d..11a2949 100644
--- a/Doc/library/distutils.rst
+++ b/Doc/library/distutils.rst
@@ -12,18 +12,14 @@ additional modules into a Python installation. The new modules may be either
100%-pure Python, or may be extension modules written in C, or may be
collections of Python packages which include modules coded in both Python and C.
-This package is discussed in two separate chapters:
+User documentation and API reference are provided in another document:
.. seealso::
:ref:`distutils-index`
The manual for developers and packagers of Python modules. This describes
how to prepare :mod:`distutils`\ -based packages so that they may be
- easily installed into an existing Python installation.
-
- :ref:`install-index`
- An "administrators" manual which includes information on installing
- modules into an existing Python installation. You do not need to be a
- Python programmer to read this manual.
-
+ easily installed into an existing Python installation. If also contains
+ instructions for end-users wanting to install a distutils-based package,
+ :ref:`install-index`.
diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst
index ec8edbe..222c719 100644
--- a/Doc/library/doctest.rst
+++ b/Doc/library/doctest.rst
@@ -320,7 +320,8 @@ The fine print:
Tabs in output generated by the tested code are not modified. Because any
hard tabs in the sample output *are* expanded, this means that if the code
output includes hard tabs, the only way the doctest can pass is if the
- :const:`NORMALIZE_WHITESPACE` option or directive is in effect.
+ :const:`NORMALIZE_WHITESPACE` option or :ref:`directive <doctest-directives>`
+ is in effect.
Alternatively, the test can be rewritten to capture the output and compare it
to an expected value as part of the test. This handling of tabs in the
source was arrived at through trial and error, and has proven to be the least
@@ -485,15 +486,16 @@ Some details you should read once, but won't need to remember:
SyntaxError: invalid syntax
+.. _option-flags-and-directives:
.. _doctest-options:
-Option Flags and Directives
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Option Flags
+^^^^^^^^^^^^
A number of option flags control various aspects of doctest's behavior.
Symbolic names for the flags are supplied as module constants, which can be
or'ed together and passed to various functions. The names can also be used in
-doctest directives (see below).
+:ref:`doctest directives <doctest-directives>`.
The first group of options define test semantics, controlling aspects of how
doctest decides whether actual output matches an example's expected output:
@@ -547,14 +549,14 @@ doctest decides whether actual output matches an example's expected output:
:exc:`TypeError` is raised.
It will also ignore the module name used in Python 3 doctest reports. Hence
- both these variations will work regardless of whether the test is run under
- Python 2.7 or Python 3.2 (or later versions):
+ both of these variations will work with the flag specified, regardless of
+ whether the test is run under Python 2.7 or Python 3.2 (or later versions)::
- >>> raise CustomError('message') #doctest: +IGNORE_EXCEPTION_DETAIL
+ >>> raise CustomError('message')
Traceback (most recent call last):
CustomError: message
- >>> raise CustomError('message') #doctest: +IGNORE_EXCEPTION_DETAIL
+ >>> raise CustomError('message')
Traceback (most recent call last):
my_module.CustomError: message
@@ -564,15 +566,16 @@ doctest decides whether actual output matches an example's expected output:
exception name. Using :const:`IGNORE_EXCEPTION_DETAIL` and the details
from Python 2.3 is also the only clear way to write a doctest that doesn't
care about the exception detail yet continues to pass under Python 2.3 or
- earlier (those releases do not support doctest directives and ignore them
- as irrelevant comments). For example, ::
+ earlier (those releases do not support :ref:`doctest directives
+ <doctest-directives>` and ignore them as irrelevant comments). For example::
- >>> (1, 2)[3] = 'moo' #doctest: +IGNORE_EXCEPTION_DETAIL
+ >>> (1, 2)[3] = 'moo'
Traceback (most recent call last):
File "<stdin>", line 1, in ?
TypeError: object doesn't support item assignment
- passes under Python 2.3 and later Python versions, even though the detail
+ passes under Python 2.3 and later Python versions with the flag specified,
+ even though the detail
changed in Python 2.4 to say "does not" instead of "doesn't".
.. versionchanged:: 3.2
@@ -634,9 +637,30 @@ The second group of options controls how test failures are reported:
A bitmask or'ing together all the reporting flags above.
-"Doctest directives" may be used to modify the option flags for individual
-examples. Doctest directives are expressed as a special Python comment
-following an example's source code:
+
+There is also a way to register new option flag names, though this isn't
+useful unless you intend to extend :mod:`doctest` internals via subclassing:
+
+
+.. function:: register_optionflag(name)
+
+ Create a new option flag with a given name, and return the new flag's integer
+ value. :func:`register_optionflag` can be used when subclassing
+ :class:`OutputChecker` or :class:`DocTestRunner` to create new options that are
+ supported by your subclasses. :func:`register_optionflag` should always be
+ called using the following idiom::
+
+ MY_FLAG = register_optionflag('MY_FLAG')
+
+
+.. _doctest-directives:
+
+Directives
+^^^^^^^^^^
+
+Doctest directives may be used to modify the :ref:`option flags
+<doctest-options>` for an individual example. Doctest directives are
+special Python comments following an example's source code:
.. productionlist:: doctest
directive: "#" "doctest:" `directive_options`
@@ -693,20 +717,6 @@ usually the only meaningful choice. However, option flags can also be passed to
functions that run doctests, establishing different defaults. In such cases,
disabling an option via ``-`` in a directive can be useful.
-There's also a way to register new option flag names, although this isn't useful
-unless you intend to extend :mod:`doctest` internals via subclassing:
-
-
-.. function:: register_optionflag(name)
-
- Create a new option flag with a given name, and return the new flag's integer
- value. :func:`register_optionflag` can be used when subclassing
- :class:`OutputChecker` or :class:`DocTestRunner` to create new options that are
- supported by your subclasses. :func:`register_optionflag` should always be
- called using the following idiom::
-
- MY_FLAG = register_optionflag('MY_FLAG')
-
.. _doctest-warnings:
diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst
index d8f330f..9f0a9e2 100644
--- a/Doc/library/email.errors.rst
+++ b/Doc/library/email.errors.rst
@@ -73,17 +73,38 @@ this class is *not* an exception!
* :class:`StartBoundaryNotFoundDefect` -- The start boundary claimed in the
:mailheader:`Content-Type` header was never found.
+* :class:`CloseBoundaryNotFoundDefect` -- A start boundary was found, but
+ no corresponding close boundary was ever found.
+
+ .. versionadded:: 3.3
+
* :class:`FirstHeaderLineIsContinuationDefect` -- The message had a continuation
line as its first header line.
* :class:`MisplacedEnvelopeHeaderDefect` - A "Unix From" header was found in the
middle of a header block.
+* :class:`MissingHeaderBodySeparatorDefect` - A line was found while parsing
+ headers that had no leading white space but contained no ':'. Parsing
+ continues assuming that the line represents the first line of the body.
+
+ .. versionadded:: 3.3
+
* :class:`MalformedHeaderDefect` -- A header was found that was missing a colon,
or was otherwise malformed.
+ .. deprecated:: 3.3
+ This defect has not been used for several Python versions.
+
* :class:`MultipartInvariantViolationDefect` -- A message claimed to be a
:mimetype:`multipart`, but no subparts were found. Note that when a message has
this defect, its :meth:`is_multipart` method may return false even though its
content type claims to be :mimetype:`multipart`.
+* :class:`InvalidBase64PaddingDefect` -- When decoding a block of base64
+ enocded bytes, the padding was not correct. Enough padding is added to
+ perform the decode, but the resulting decoded bytes may be invalid.
+
+* :class:`InvalidBase64CharactersDefect` -- When decoding a block of base64
+ enocded bytes, characters outside the base64 alphebet were encountered.
+ The characters are ignored, but the resulting decoded bytes may be invalid.
diff --git a/Doc/library/email.generator.rst b/Doc/library/email.generator.rst
index 88c62a2..c172acb 100644
--- a/Doc/library/email.generator.rst
+++ b/Doc/library/email.generator.rst
@@ -32,7 +32,7 @@ Here are the public methods of the :class:`Generator` class, imported from the
:mod:`email.generator` module:
-.. class:: Generator(outfp, mangle_from_=True, maxheaderlen=78)
+.. class:: Generator(outfp, mangle_from_=True, maxheaderlen=78, *, policy=None)
The constructor for the :class:`Generator` class takes a :term:`file-like object`
called *outfp* for an argument. *outfp* must support the :meth:`write` method
@@ -53,10 +53,17 @@ Here are the public methods of the :class:`Generator` class, imported from the
:class:`~email.header.Header` class. Set to zero to disable header wrapping.
The default is 78, as recommended (but not required) by :rfc:`2822`.
+ The *policy* keyword specifies a :mod:`~email.policy` object that controls a
+ number of aspects of the generator's operation. If no *policy* is specified,
+ then the *policy* attached to the message object passed to :attr:`flatten`
+ is used.
+
+ .. versionchanged:: 3.3 Added the *policy* keyword.
+
The other public :class:`Generator` methods are:
- .. method:: flatten(msg, unixfrom=False, linesep='\\n')
+ .. method:: flatten(msg, unixfrom=False, linesep=None)
Print the textual representation of the message object structure rooted at
*msg* to the output file specified when the :class:`Generator` instance
@@ -72,19 +79,20 @@ Here are the public methods of the :class:`Generator` class, imported from the
Note that for subparts, no envelope header is ever printed.
Optional *linesep* specifies the line separator character used to
- terminate lines in the output. It defaults to ``\n`` because that is
- the most useful value for Python application code (other library packages
- expect ``\n`` separated lines). ``linesep=\r\n`` can be used to
- generate output with RFC-compliant line separators.
+ terminate lines in the output. If specified it overrides the value
+ specified by the *msg*\'s or ``Generator``\'s ``policy``.
- Messages parsed with a Bytes parser that have a
- :mailheader:`Content-Transfer-Encoding` of 8bit will be converted to a
- use a 7bit Content-Transfer-Encoding. Non-ASCII bytes in the headers
- will be :rfc:`2047` encoded with a charset of `unknown-8bit`.
+ Because strings cannot represent non-ASCII bytes, if the policy that
+ applies when ``flatten`` is run has :attr:`~email.policy.Policy.cte_type`
+ set to ``8bit``, ``Generator`` will operate as if it were set to
+ ``7bit``. This means that messages parsed with a Bytes parser that have
+ a :mailheader:`Content-Transfer-Encoding` of ``8bit`` will be converted
+ to a use a ``7bit`` Content-Transfer-Encoding. Non-ASCII bytes in the
+ headers will be :rfc:`2047` encoded with a charset of ``unknown-8bit``.
.. versionchanged:: 3.2
- Added support for re-encoding 8bit message bodies, and the *linesep*
- argument.
+ Added support for re-encoding ``8bit`` message bodies, and the
+ *linesep* argument.
.. method:: clone(fp)
@@ -103,7 +111,8 @@ As a convenience, see the :class:`~email.message.Message` methods
formatted string representation of a message object. For more detail, see
:mod:`email.message`.
-.. class:: BytesGenerator(outfp, mangle_from_=True, maxheaderlen=78)
+.. class:: BytesGenerator(outfp, mangle_from_=True, maxheaderlen=78, *, \
+ policy=policy.default)
The constructor for the :class:`BytesGenerator` class takes a binary
:term:`file-like object` called *outfp* for an argument. *outfp* must
@@ -125,19 +134,31 @@ formatted string representation of a message object. For more detail, see
wrapping. The default is 78, as recommended (but not required) by
:rfc:`2822`.
+ The *policy* keyword specifies a :mod:`~email.policy` object that controls a
+ number of aspects of the generator's operation. The default policy
+ maintains backward compatibility.
+
+ .. versionchanged:: 3.3 Added the *policy* keyword.
+
The other public :class:`BytesGenerator` methods are:
- .. method:: flatten(msg, unixfrom=False, linesep='\n')
+ .. method:: flatten(msg, unixfrom=False, linesep=None)
Print the textual representation of the message object structure rooted
at *msg* to the output file specified when the :class:`BytesGenerator`
instance was created. Subparts are visited depth-first and the resulting
- text will be properly MIME encoded. If the input that created the *msg*
- contained bytes with the high bit set and those bytes have not been
- modified, they will be copied faithfully to the output, even if doing so
- is not strictly RFC compliant. (To produce strictly RFC compliant
- output, use the :class:`Generator` class.)
+ text will be properly MIME encoded. If the :mod:`~email.policy` option
+ :attr:`~email.policy.Policy.cte_type` is ``8bit`` (the default),
+ then any bytes with the high bit set in the original parsed message that
+ have not been modified will be copied faithfully to the output. If
+ ``cte_type`` is ``7bit``, the bytes will be converted as needed
+ using an ASCII-compatible Content-Transfer-Encoding. In particular,
+ RFC-invalid non-ASCII bytes in headers will be encoded using the MIME
+ ``unknown-8bit`` character set, thus rendering them RFC-compliant.
+
+ .. XXX: There should be a complementary option that just does the RFC
+ compliance transformation but leaves CTE 8bit parts alone.
Messages parsed with a Bytes parser that have a
:mailheader:`Content-Transfer-Encoding` of 8bit will be reconstructed
@@ -152,10 +173,8 @@ formatted string representation of a message object. For more detail, see
Note that for subparts, no envelope header is ever printed.
Optional *linesep* specifies the line separator character used to
- terminate lines in the output. It defaults to ``\n`` because that is
- the most useful value for Python application code (other library packages
- expect ``\n`` separated lines). ``linesep=\r\n`` can be used to
- generate output with RFC-compliant line separators.
+ terminate lines in the output. If specified it overrides the value
+ specified by the ``Generator``\ 's ``policy``.
.. method:: clone(fp)
diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst
index 7d2dc2e..346d23f 100644
--- a/Doc/library/email.header.rst
+++ b/Doc/library/email.header.rst
@@ -31,8 +31,8 @@ For example::
>>> msg = Message()
>>> h = Header('p\xf6stal', 'iso-8859-1')
>>> msg['Subject'] = h
- >>> print(msg.as_string())
- Subject: =?iso-8859-1?q?p=F6stal?=
+ >>> msg.as_string()
+ 'Subject: =?iso-8859-1?q?p=F6stal?=\n\n'
@@ -176,7 +176,7 @@ The :mod:`email.header` module also provides the following convenient functions.
>>> from email.header import decode_header
>>> decode_header('=?iso-8859-1?q?p=F6stal?=')
- [('p\xf6stal', 'iso-8859-1')]
+ [(b'p\xf6stal', 'iso-8859-1')]
.. function:: make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' ')
diff --git a/Doc/library/email.headerregistry.rst b/Doc/library/email.headerregistry.rst
new file mode 100644
index 0000000..c884159
--- /dev/null
+++ b/Doc/library/email.headerregistry.rst
@@ -0,0 +1,452 @@
+:mod:`email.headerregistry`: Custom Header Objects
+--------------------------------------------------
+
+.. module:: email.headerregistry
+ :synopsis: Automatic Parsing of headers based on the field name
+
+.. moduleauthor:: R. David Murray <rdmurray@bitdance.com>
+.. sectionauthor:: R. David Murray <rdmurray@bitdance.com>
+
+
+.. note::
+
+ The headerregistry module has been included in the standard library on a
+ :term:`provisional basis <provisional package>`. Backwards incompatible
+ changes (up to and including removal of the module) may occur if deemed
+ necessary by the core developers.
+
+.. versionadded:: 3.3
+ as a :term:`provisional module <provisional package>`.
+
+Headers are represented by customized subclasses of :class:`str`. The
+particular class used to represent a given header is determined by the
+:attr:`~email.policy.EmailPolicy.header_factory` of the :mod:`~email.policy` in
+effect when the headers are created. This section documents the particular
+``header_factory`` implemented by the email package for handling :RFC:`5322`
+compliant email messages, which not only provides customized header objects for
+various header types, but also provides an extension mechanism for applications
+to add their own custom header types.
+
+When using any of the policy objects derived from
+:data:`~email.policy.EmailPolicy`, all headers are produced by
+:class:`.HeaderRegistry` and have :class:`.BaseHeader` as their last base
+class. Each header class has an additional base class that is determined by
+the type of the header. For example, many headers have the class
+:class:`.UnstructuredHeader` as their other base class. The specialized second
+class for a header is determined by the name of the header, using a lookup
+table stored in the :class:`.HeaderRegistry`. All of this is managed
+transparently for the typical application program, but interfaces are provided
+for modifying the default behavior for use by more complex applications.
+
+The sections below first document the header base classes and their attributes,
+followed by the API for modifying the behavior of :class:`.HeaderRegistry`, and
+finally the support classes used to represent the data parsed from structured
+headers.
+
+
+.. class:: BaseHeader(name, value)
+
+ *name* and *value* are passed to ``BaseHeader`` from the
+ :attr:`~email.policy.EmailPolicy.header_factory` call. The string value of
+ any header object is the *value* fully decoded to unicode.
+
+ This base class defines the following read-only properties:
+
+
+ .. attribute:: name
+
+ The name of the header (the portion of the field before the ':'). This
+ is exactly the value passed in the :attr:`~EmailPolicy.header_factory`
+ call for *name*; that is, case is preserved.
+
+
+ .. attribute:: defects
+
+ A tuple of :exc:`~email.errors.HeaderDefect` instances reporting any
+ RFC compliance problems found during parsing. The email package tries to
+ be complete about detecting compliance issues. See the :mod:`errors`
+ module for a discussion of the types of defects that may be reported.
+
+
+ .. attribute:: max_count
+
+ The maximum number of headers of this type that can have the same
+ ``name``. A value of ``None`` means unlimited. The ``BaseHeader`` value
+ for this attribute is ``None``; it is expected that specialized header
+ classes will override this value as needed.
+
+ ``BaseHeader`` also provides the following method, which is called by the
+ email library code and should not in general be called by application
+ programs:
+
+ .. method:: fold(*, policy)
+
+ Return a string containing :attr:`~email.policy.Policy.linesep`
+ characters as required to correctly fold the header according
+ to *policy*. A :attr:`~email.policy.Policy.cte_type` of
+ ``8bit`` will be treated as if it were ``7bit``, since strings
+ may not contain binary data.
+
+
+ ``BaseHeader`` by itself cannot be used to create a header object. It
+ defines a protocol that each specialized header cooperates with in order to
+ produce the header object. Specifically, ``BaseHeader`` requires that
+ the specialized class provide a :func:`classmethod` named ``parse``. This
+ method is called as follows::
+
+ parse(string, kwds)
+
+ ``kwds`` is a dictionary containing one pre-initialized key, ``defects``.
+ ``defects`` is an empty list. The parse method should append any detected
+ defects to this list. On return, the ``kwds`` dictionary *must* contain
+ values for at least the keys ``decoded`` and ``defects``. ``decoded``
+ should be the string value for the header (that is, the header value fully
+ decoded to unicode). The parse method should assume that *string* may
+ contain transport encoded parts, but should correctly handle all valid
+ unicode characters as well so that it can parse un-encoded header values.
+
+ ``BaseHeader``'s ``__new__`` then creates the header instance, and calls its
+ ``init`` method. The specialized class only needs to provide an ``init``
+ method if it wishes to set additional attributes beyond those provided by
+ ``BaseHeader`` itself. Such an ``init`` method should look like this::
+
+ def init(self, *args, **kw):
+ self._myattr = kw.pop('myattr')
+ super().init(*args, **kw)
+
+ That is, anything extra that the specialized class puts in to the ``kwds``
+ dictionary should be removed and handled, and the remaining contents of
+ ``kw`` (and ``args``) passed to the ``BaseHeader`` ``init`` method.
+
+
+.. class:: UnstructuredHeader
+
+ An "unstructured" header is the default type of header in :rfc:`5322`.
+ Any header that does not have a specified syntax is treated as
+ unstructured. The classic example of an unstructured header is the
+ :mailheader:`Subject` header.
+
+ In :rfc:`5322`, an unstructured header is a run of arbitrary text in the
+ ASCII character set. :rfc:`2047`, however, has an :rfc:`5322` compatible
+ mechanism for encoding non-ASCII text as ASCII characters within a header
+ value. When a *value* containing encoded words is passed to the
+ constructor, the ``UnstructuredHeader`` parser converts such encoded words
+ back in to the original unicode, following the :rfc:`2047` rules for
+ unstructured text. The parser uses heuristics to attempt to decode certain
+ non-compliant encoded words. Defects are registered in such cases, as well
+ as defects for issues such as invalid characters within the encoded words or
+ the non-encoded text.
+
+ This header type provides no additional attributes.
+
+
+.. class:: DateHeader
+
+ :rfc:`5322` specifies a very specific format for dates within email headers.
+ The ``DateHeader`` parser recognizes that date format, as well as
+ recognizing a number of variant forms that are sometimes found "in the
+ wild".
+
+ This header type provides the following additional attributes:
+
+ .. attribute:: datetime
+
+ If the header value can be recognized as a valid date of one form or
+ another, this attribute will contain a :class:`~datetime.datetime`
+ instance representing that date. If the timezone of the input date is
+ specified as ``-0000`` (indicating it is in UTC but contains no
+ information about the source timezone), then :attr:`.datetime` will be a
+ naive :class:`~datetime.datetime`. If a specific timezone offset is
+ found (including `+0000`), then :attr:`.datetime` will contain an aware
+ ``datetime`` that uses :class:`datetime.timezone` to record the timezone
+ offset.
+
+ The ``decoded`` value of the header is determined by formatting the
+ ``datetime`` according to the :rfc:`5322` rules; that is, it is set to::
+
+ email.utils.format_datetime(self.datetime)
+
+ When creating a ``DateHeader``, *value* may be
+ :class:`~datetime.datetime` instance. This means, for example, that
+ the following code is valid and does what one would expect::
+
+ msg['Date'] = datetime(2011, 7, 15, 21)
+
+ Because this is a naive ``datetime`` it will be interpreted as a UTC
+ timestamp, and the resulting value will have a timezone of ``-0000``. Much
+ more useful is to use the :func:`~email.utils.localtime` function from the
+ :mod:`~email.utils` module::
+
+ msg['Date'] = utils.localtime()
+
+ This example sets the date header to the current time and date using
+ the current timezone offset.
+
+
+.. class:: AddressHeader
+
+ Address headers are one of the most complex structured header types.
+ The ``AddressHeader`` class provides a generic interface to any address
+ header.
+
+ This header type provides the following additional attributes:
+
+
+ .. attribute:: groups
+
+ A tuple of :class:`.Group` objects encoding the
+ addresses and groups found in the header value. Addresses that are
+ not part of a group are represented in this list as single-address
+ ``Groups`` whose :attr:`~.Group.display_name` is ``None``.
+
+
+ .. attribute:: addresses
+
+ A tuple of :class:`.Address` objects encoding all
+ of the individual addresses from the header value. If the header value
+ contains any groups, the individual addresses from the group are included
+ in the list at the point where the group occurs in the value (that is,
+ the list of addresses is "flattened" into a one dimensional list).
+
+ The ``decoded`` value of the header will have all encoded words decoded to
+ unicode. :class:`~encodings.idna` encoded domain names are also decoded to unicode. The
+ ``decoded`` value is set by :attr:`~str.join`\ ing the :class:`str` value of
+ the elements of the ``groups`` attribute with ``', '``.
+
+ A list of :class:`.Address` and :class:`.Group` objects in any combination
+ may be used to set the value of an address header. ``Group`` objects whose
+ ``display_name`` is ``None`` will be interpreted as single addresses, which
+ allows an address list to be copied with groups intact by using the list
+ obtained ``groups`` attribute of the source header.
+
+
+.. class:: SingleAddressHeader
+
+ A subclass of :class:`.AddressHeader` that adds one
+ additional attribute:
+
+
+ .. attribute:: address
+
+ The single address encoded by the header value. If the header value
+ actually contains more than one address (which would be a violation of
+ the RFC under the default :mod:`policy`), accessing this attribute will
+ result in a :exc:`ValueError`.
+
+
+Many of the above classes also have a ``Unique`` variant (for example,
+``UniqueUnstructuredHeader``). The only difference is that in the ``Unique``
+variant, :attr:`~.BaseHeader.max_count` is set to 1.
+
+
+.. class:: MIMEVersionHeader
+
+ There is really only one valid value for the :mailheader:`MIME-Version`
+ header, and that is ``1.0``. For future proofing, this header class
+ supports other valid version numbers. If a version number has a valid value
+ per :rfc:`2045`, then the header object will have non-``None`` values for
+ the following attributes:
+
+ .. attribute:: version
+
+ The version number as a string, with any whitespace and/or comments
+ removed.
+
+ .. attribute:: major
+
+ The major version number as an integer
+
+ .. attribute:: minor
+
+ The minor version number as an integer
+
+
+.. class:: ParameterizedMIMEHeader
+
+ MOME headers all start with the prefix 'Content-'. Each specific header has
+ a certain value, described under the class for that header. Some can
+ also take a list of supplemental parameters, which have a common format.
+ This class serves as a base for all the MIME headers that take parameters.
+
+ .. attribute:: params
+
+ A dictionary mapping parameter names to parameter values.
+
+
+.. class:: ContentTypeHeader
+
+ A :class:`ParameterizedMIMEHheader` class that handles the
+ :mailheader:`Content-Type` header.
+
+ .. attribute:: content_type
+
+ The content type string, in the form ``maintype/subtype``.
+
+ .. attribute:: maintype
+
+ .. attribute:: subtype
+
+
+.. class:: ContentDispositionHeader
+
+ A :class:`ParameterizedMIMEHheader` class that handles the
+ :mailheader:`Content-Disposition` header.
+
+ .. attribute:: content-disposition
+
+ ``inline`` and ``attachment`` are the only valid values in common use.
+
+
+.. class:: ContentTransferEncoding
+
+ Handles the :mailheader:`Content-Transfer-Encoding` header.
+
+ .. attribute:: cte
+
+ Valid values are ``7bit``, ``8bit``, ``base64``, and
+ ``quoted-printable``. See :rfc:`2045` for more information.
+
+
+
+.. class:: HeaderRegistry(base_class=BaseHeader, \
+ default_class=UnstructuredHeader, \
+ use_default_map=True)
+
+ This is the factory used by :class:`~email.policy.EmailPolicy` by default.
+ ``HeaderRegistry`` builds the class used to create a header instance
+ dynamically, using *base_class* and a specialized class retrieved from a
+ registry that it holds. When a given header name does not appear in the
+ registry, the class specified by *default_class* is used as the specialized
+ class. When *use_default_map* is ``True`` (the default), the standard
+ mapping of header names to classes is copied in to the registry during
+ initialization. *base_class* is always the last class in the generated
+ class's ``__bases__`` list.
+
+ The default mappings are:
+
+ :subject: UniqueUnstructuredHeader
+ :date: UniqueDateHeader
+ :resent-date: DateHeader
+ :orig-date: UniqueDateHeader
+ :sender: UniqueSingleAddressHeader
+ :resent-sender: SingleAddressHeader
+ :to: UniqueAddressHeader
+ :resent-to: AddressHeader
+ :cc: UniqueAddressHeader
+ :resent-cc: AddressHeader
+ :from: UniqueAddressHeader
+ :resent-from: AddressHeader
+ :reply-to: UniqueAddressHeader
+
+ ``HeaderRegistry`` has the following methods:
+
+
+ .. method:: map_to_type(self, name, cls)
+
+ *name* is the name of the header to be mapped. It will be converted to
+ lower case in the registry. *cls* is the specialized class to be used,
+ along with *base_class*, to create the class used to instantiate headers
+ that match *name*.
+
+
+ .. method:: __getitem__(name)
+
+ Construct and return a class to handle creating a *name* header.
+
+
+ .. method:: __call__(name, value)
+
+ Retrieves the specialized header associated with *name* from the
+ registry (using *default_class* if *name* does not appear in the
+ registry) and composes it with *base_class* to produce a class,
+ calls the constructed class's constructor, passing it the same
+ argument list, and finally returns the class instance created thereby.
+
+
+The following classes are the classes used to represent data parsed from
+structured headers and can, in general, be used by an application program to
+construct structured values to assign to specific headers.
+
+
+.. class:: Address(display_name='', username='', domain='', addr_spec=None)
+
+ The class used to represent an email address. The general form of an
+ address is::
+
+ [display_name] <username@domain>
+
+ or::
+
+ username@domain
+
+ where each part must conform to specific syntax rules spelled out in
+ :rfc:`5322`.
+
+ As a convenience *addr_spec* can be specified instead of *username* and
+ *domain*, in which case *username* and *domain* will be parsed from the
+ *addr_spec*. An *addr_spec* must be a properly RFC quoted string; if it is
+ not ``Address`` will raise an error. Unicode characters are allowed and
+ will be property encoded when serialized. However, per the RFCs, unicode is
+ *not* allowed in the username portion of the address.
+
+ .. attribute:: display_name
+
+ The display name portion of the address, if any, with all quoting
+ removed. If the address does not have a display name, this attribute
+ will be an empty string.
+
+ .. attribute:: username
+
+ The ``username`` portion of the address, with all quoting removed.
+
+ .. attribute:: domain
+
+ The ``domain`` portion of the address.
+
+ .. attribute:: addr_spec
+
+ The ``username@domain`` portion of the address, correctly quoted
+ for use as a bare address (the second form shown above). This
+ attribute is not mutable.
+
+ .. method:: __str__()
+
+ The ``str`` value of the object is the address quoted according to
+ :rfc:`5322` rules, but with no Content Transfer Encoding of any non-ASCII
+ characters.
+
+ To support SMTP (:rfc:`5321`), ``Address`` handles one special case: if
+ ``username`` and ``domain`` are both the empty string (or ``None``), then
+ the string value of the ``Address`` is ``<>``.
+
+
+.. class:: Group(display_name=None, addresses=None)
+
+ The class used to represent an address group. The general form of an
+ address group is::
+
+ display_name: [address-list];
+
+ As a convenience for processing lists of addresses that consist of a mixture
+ of groups and single addresses, a ``Group`` may also be used to represent
+ single addresses that are not part of a group by setting *display_name* to
+ ``None`` and providing a list of the single address as *addresses*.
+
+ .. attribute:: display_name
+
+ The ``display_name`` of the group. If it is ``None`` and there is
+ exactly one ``Address`` in ``addresses``, then the ``Group`` represents a
+ single address that is not in a group.
+
+ .. attribute:: addresses
+
+ A possibly empty tuple of :class:`.Address` objects representing the
+ addresses in the group.
+
+ .. method:: __str__()
+
+ The ``str`` value of a ``Group`` is formatted according to :rfc:`5322`,
+ but with no Content Transfer Encoding of any non-ASCII characters. If
+ ``display_name`` is none and there is a single ``Address`` in the
+ ``addresses`` list, the ``str`` value will be the same as the ``str`` of
+ that single ``Address``.
diff --git a/Doc/library/email.message.rst b/Doc/library/email.message.rst
index f685e54..59ab47d 100644
--- a/Doc/library/email.message.rst
+++ b/Doc/library/email.message.rst
@@ -111,10 +111,14 @@ Here are the methods of the :class:`Message` class:
header. When ``True`` and the message is not a multipart, the payload will
be decoded if this header's value is ``quoted-printable`` or ``base64``.
If some other encoding is used, or :mailheader:`Content-Transfer-Encoding`
- header is missing, or if the payload has bogus base64 data, the payload is
+ header is missing, the payload is
returned as-is (undecoded). In all cases the returned value is binary
data. If the message is a multipart and the *decode* flag is ``True``,
- then ``None`` is returned.
+ then ``None`` is returned. If the payload is base64 and it was not
+ perfectly formed (missing padding, characters outside the base64
+ alphabet), then an appropriate defect will be added to the message's
+ defect property (:class:`~email.errors.InvalidBase64PaddingDefect` or
+ :class:`~email.errors.InvalidBase64CharactersDefect`, respectively).
When *decode* is ``False`` (the default) the body is returned as a string
without decoding the :mailheader:`Content-Transfer-Encoding`. However,
diff --git a/Doc/library/email.mime.rst b/Doc/library/email.mime.rst
index ae340f7..db5584c 100644
--- a/Doc/library/email.mime.rst
+++ b/Doc/library/email.mime.rst
@@ -175,7 +175,7 @@ Here are the classes:
.. currentmodule:: email.mime.text
-.. class:: MIMEText(_text, _subtype='plain', _charset='us-ascii')
+.. class:: MIMEText(_text, _subtype='plain', _charset=None)
Module: :mod:`email.mime.text`
@@ -185,5 +185,5 @@ Here are the classes:
minor type and defaults to :mimetype:`plain`. *_charset* is the character
set of the text and is passed as a parameter to the
:class:`~email.mime.nonmultipart.MIMENonMultipart` constructor; it defaults
- to ``us-ascii``. No guessing or encoding is performed on the text data.
-
+ to ``us-ascii`` if the string contains only ``ascii`` codepoints, and
+ ``utf-8`` otherwise.
diff --git a/Doc/library/email.parser.rst b/Doc/library/email.parser.rst
index 49a59c0..6a43561 100644
--- a/Doc/library/email.parser.rst
+++ b/Doc/library/email.parser.rst
@@ -58,12 +58,18 @@ list of defects that it can find.
Here is the API for the :class:`FeedParser`:
-.. class:: FeedParser(_factory=email.message.Message)
+.. class:: FeedParser(_factory=email.message.Message, *, policy=policy.default)
Create a :class:`FeedParser` instance. Optional *_factory* is a no-argument
callable that will be called whenever a new message object is needed. It
defaults to the :class:`email.message.Message` class.
+ The *policy* keyword specifies a :mod:`~email.policy` object that controls a
+ number of aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ .. versionchanged:: 3.3 Added the *policy* keyword.
+
.. method:: feed(data)
Feed the :class:`FeedParser` some more data. *data* should be a string
@@ -94,15 +100,18 @@ Parser class API
The :class:`Parser` class, imported from the :mod:`email.parser` module,
provides an API that can be used to parse a message when the complete contents
of the message are available in a string or file. The :mod:`email.parser`
-module also provides a second class, called :class:`HeaderParser` which can be
-used if you're only interested in the headers of the message.
-:class:`HeaderParser` can be much faster in these situations, since it does not
-attempt to parse the message body, instead setting the payload to the raw body
-as a string. :class:`HeaderParser` has the same API as the :class:`Parser`
-class.
+module also provides header-only parsers, called :class:`HeaderParser` and
+:class:`BytesHeaderParser`, which can be used if you're only interested in the
+headers of the message. :class:`HeaderParser` and :class:`BytesHeaderParser`
+can be much faster in these situations, since they do not attempt to parse the
+message body, instead setting the payload to the raw body as a string. They
+have the same API as the :class:`Parser` and :class:`BytesParser` classes.
+.. versionadded:: 3.3
+ The BytesHeaderParser class.
-.. class:: Parser(_class=email.message.Message, strict=None)
+
+.. class:: Parser(_class=email.message.Message, *, policy=policy.default)
The constructor for the :class:`Parser` class takes an optional argument
*_class*. This must be a callable factory (such as a function or a class), and
@@ -110,13 +119,13 @@ class.
:class:`~email.message.Message` (see :mod:`email.message`). The factory will
be called without arguments.
- The optional *strict* flag is ignored.
+ The *policy* keyword specifies a :mod:`~email.policy` object that controls a
+ number of aspects of the parser's operation. The default policy maintains
+ backward compatibility.
- .. deprecated:: 2.4
- Because the :class:`Parser` class is a backward compatible API wrapper
- around the new-in-Python 2.4 :class:`FeedParser`, *all* parsing is
- effectively non-strict. You should simply stop passing a *strict* flag to
- the :class:`Parser` constructor.
+ .. versionchanged:: 3.3
+ Removed the *strict* argument that was deprecated in 2.4. Added the
+ *policy* keyword.
The other public :class:`Parser` methods are:
@@ -147,12 +156,18 @@ class.
Optional *headersonly* is as with the :meth:`parse` method.
-.. class:: BytesParser(_class=email.message.Message, strict=None)
+.. class:: BytesParser(_class=email.message.Message, *, policy=policy.default)
This class is exactly parallel to :class:`Parser`, but handles bytes input.
The *_class* and *strict* arguments are interpreted in the same way as for
- the :class:`Parser` constructor. *strict* is supported only to make porting
- code easier; it is deprecated.
+ the :class:`Parser` constructor.
+
+ The *policy* keyword specifies a :mod:`~email.policy` object that
+ controls a number of aspects of the parser's operation. The default
+ policy maintains backward compatibility.
+
+ .. versionchanged:: 3.3
+ Removed the *strict* argument. Added the *policy* keyword.
.. method:: parse(fp, headeronly=False)
@@ -190,39 +205,53 @@ in the top-level :mod:`email` package namespace.
.. currentmodule:: email
-.. function:: message_from_string(s, _class=email.message.Message, strict=None)
+.. function:: message_from_string(s, _class=email.message.Message, *, \
+ policy=policy.default)
Return a message object structure from a string. This is exactly equivalent to
- ``Parser().parsestr(s)``. Optional *_class* and *strict* are interpreted as
+ ``Parser().parsestr(s)``. *_class* and *policy* are interpreted as
with the :class:`Parser` class constructor.
-.. function:: message_from_bytes(s, _class=email.message.Message, strict=None)
+ .. versionchanged:: 3.3
+ Removed the *strict* argument. Added the *policy* keyword.
+
+.. function:: message_from_bytes(s, _class=email.message.Message, *, \
+ policy=policy.default)
Return a message object structure from a byte string. This is exactly
equivalent to ``BytesParser().parsebytes(s)``. Optional *_class* and
*strict* are interpreted as with the :class:`Parser` class constructor.
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ Removed the *strict* argument. Added the *policy* keyword.
-.. function:: message_from_file(fp, _class=email.message.Message, strict=None)
+.. function:: message_from_file(fp, _class=email.message.Message, *, \
+ policy=policy.default)
Return a message object structure tree from an open :term:`file object`.
- This is exactly equivalent to ``Parser().parse(fp)``. Optional *_class*
- and *strict* are interpreted as with the :class:`Parser` class constructor.
+ This is exactly equivalent to ``Parser().parse(fp)``. *_class*
+ and *policy* are interpreted as with the :class:`Parser` class constructor.
+
+ .. versionchanged::
+ Removed the *strict* argument. Added the *policy* keyword.
-.. function:: message_from_binary_file(fp, _class=email.message.Message, strict=None)
+.. function:: message_from_binary_file(fp, _class=email.message.Message, *, \
+ policy=policy.default)
Return a message object structure tree from an open binary :term:`file
object`. This is exactly equivalent to ``BytesParser().parse(fp)``.
- Optional *_class* and *strict* are interpreted as with the :class:`Parser`
+ *_class* and *policy* are interpreted as with the :class:`Parser`
class constructor.
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ Removed the *strict* argument. Added the *policy* keyword.
Here's an example of how you might use this at an interactive Python prompt::
>>> import email
- >>> msg = email.message_from_string(myString)
+ >>> msg = email.message_from_string(myString) # doctest: +SKIP
Additional notes
diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst
new file mode 100644
index 0000000..31b13c1
--- /dev/null
+++ b/Doc/library/email.policy.rst
@@ -0,0 +1,497 @@
+:mod:`email.policy`: Policy Objects
+-----------------------------------
+
+.. module:: email.policy
+ :synopsis: Controlling the parsing and generating of messages
+
+.. moduleauthor:: R. David Murray <rdmurray@bitdance.com>
+.. sectionauthor:: R. David Murray <rdmurray@bitdance.com>
+
+.. versionadded:: 3.3
+
+
+The :mod:`email` package's prime focus is the handling of email messages as
+described by the various email and MIME RFCs. However, the general format of
+email messages (a block of header fields each consisting of a name followed by
+a colon followed by a value, the whole block followed by a blank line and an
+arbitrary 'body'), is a format that has found utility outside of the realm of
+email. Some of these uses conform fairly closely to the main RFCs, some do
+not. And even when working with email, there are times when it is desirable to
+break strict compliance with the RFCs.
+
+Policy objects give the email package the flexibility to handle all these
+disparate use cases.
+
+A :class:`Policy` object encapsulates a set of attributes and methods that
+control the behavior of various components of the email package during use.
+:class:`Policy` instances can be passed to various classes and methods in the
+email package to alter the default behavior. The settable values and their
+defaults are described below.
+
+There is a default policy used by all classes in the email package. This
+policy is named :class:`Compat32`, with a corresponding pre-defined instance
+named :const:`compat32`. It provides for complete backward compatibility (in
+some cases, including bug compatibility) with the pre-Python3.3 version of the
+email package.
+
+The first part of this documentation covers the features of :class:`Policy`, an
+:term:`abstract base class` that defines the features that are common to all
+policy objects, including :const:`compat32`. This includes certain hook
+methods that are called internally by the email package, which a custom policy
+could override to obtain different behavior.
+
+When a :class:`~email.message.Message` object is created, it acquires a policy.
+By default this will be :const:`compat32`, but a different policy can be
+specified. If the ``Message`` is created by a :mod:`~email.parser`, a policy
+passed to the parser will be the policy used by the ``Message`` it creates. If
+the ``Message`` is created by the program, then the policy can be specified
+when it is created. When a ``Message`` is passed to a :mod:`~email.generator`,
+the generator uses the policy from the ``Message`` by default, but you can also
+pass a specific policy to the generator that will override the one stored on
+the ``Message`` object.
+
+:class:`Policy` instances are immutable, but they can be cloned, accepting the
+same keyword arguments as the class constructor and returning a new
+:class:`Policy` instance that is a copy of the original but with the specified
+attributes values changed.
+
+As an example, the following code could be used to read an email message from a
+file on disk and pass it to the system ``sendmail`` program on a Unix system::
+
+ >>> from email import msg_from_binary_file
+ >>> from email.generator import BytesGenerator
+ >>> from subprocess import Popen, PIPE
+ >>> with open('mymsg.txt', 'b') as f:
+ ... msg = msg_from_binary_file(f)
+ >>> p = Popen(['sendmail', msg['To'][0].address], stdin=PIPE)
+ >>> g = BytesGenerator(p.stdin, policy=msg.policy.clone(linesep='\r\n'))
+ >>> g.flatten(msg)
+ >>> p.stdin.close()
+ >>> rc = p.wait()
+
+Here we are telling :class:`~email.generator.BytesGenerator` to use the RFC
+correct line separator characters when creating the binary string to feed into
+``sendmail's`` ``stdin``, where the default policy would use ``\n`` line
+separators.
+
+Some email package methods accept a *policy* keyword argument, allowing the
+policy to be overridden for that method. For example, the following code uses
+the :meth:`~email.message.Message.as_string` method of the *msg* object from
+the previous example and writes the message to a file using the native line
+separators for the platform on which it is running::
+
+ >>> import os
+ >>> with open('converted.txt', 'wb') as f:
+ ... f.write(msg.as_string(policy=msg.policy.clone(linesep=os.linesep))
+
+Policy objects can also be combined using the addition operator, producing a
+policy object whose settings are a combination of the non-default values of the
+summed objects::
+
+ >>> compat_SMTP = email.policy.clone(linesep='\r\n')
+ >>> compat_strict = email.policy.clone(raise_on_defect=True)
+ >>> compat_strict_SMTP = compat_SMTP + compat_strict
+
+This operation is not commutative; that is, the order in which the objects are
+added matters. To illustrate::
+
+ >>> policy100 = compat32.clone(max_line_length=100)
+ >>> policy80 = compat32.clone(max_line_length=80)
+ >>> apolicy = policy100 + Policy80
+ >>> apolicy.max_line_length
+ 80
+ >>> apolicy = policy80 + policy100
+ >>> apolicy.max_line_length
+ 100
+
+
+.. class:: Policy(**kw)
+
+ This is the :term:`abstract base class` for all policy classes. It provides
+ default implementations for a couple of trivial methods, as well as the
+ implementation of the immutability property, the :meth:`clone` method, and
+ the constructor semantics.
+
+ The constructor of a policy class can be passed various keyword arguments.
+ The arguments that may be specified are any non-method properties on this
+ class, plus any additional non-method properties on the concrete class. A
+ value specified in the constructor will override the default value for the
+ corresponding attribute.
+
+ This class defines the following properties, and thus values for the
+ following may be passed in the constructor of any policy class:
+
+ .. attribute:: max_line_length
+
+ The maximum length of any line in the serialized output, not counting the
+ end of line character(s). Default is 78, per :rfc:`5322`. A value of
+ ``0`` or :const:`None` indicates that no line wrapping should be
+ done at all.
+
+ .. attribute:: linesep
+
+ The string to be used to terminate lines in serialized output. The
+ default is ``\n`` because that's the internal end-of-line discipline used
+ by Python, though ``\r\n`` is required by the RFCs.
+
+ .. attribute:: cte_type
+
+ Controls the type of Content Transfer Encodings that may be or are
+ required to be used. The possible values are:
+
+ ======== ===============================================================
+ ``7bit`` all data must be "7 bit clean" (ASCII-only). This means that
+ where necessary data will be encoded using either
+ quoted-printable or base64 encoding.
+
+ ``8bit`` data is not constrained to be 7 bit clean. Data in headers is
+ still required to be ASCII-only and so will be encoded (see
+ 'binary_fold' below for an exception), but body parts may use
+ the ``8bit`` CTE.
+ ======== ===============================================================
+
+ A ``cte_type`` value of ``8bit`` only works with ``BytesGenerator``, not
+ ``Generator``, because strings cannot contain binary data. If a
+ ``Generator`` is operating under a policy that specifies
+ ``cte_type=8bit``, it will act as if ``cte_type`` is ``7bit``.
+
+ .. attribute:: raise_on_defect
+
+ If :const:`True`, any defects encountered will be raised as errors. If
+ :const:`False` (the default), defects will be passed to the
+ :meth:`register_defect` method.
+
+ The following :class:`Policy` method is intended to be called by code using
+ the email library to create policy instances with custom settings:
+
+ .. method:: clone(**kw)
+
+ Return a new :class:`Policy` instance whose attributes have the same
+ values as the current instance, except where those attributes are
+ given new values by the keyword arguments.
+
+ The remaining :class:`Policy` methods are called by the email package code,
+ and are not intended to be called by an application using the email package.
+ A custom policy must implement all of these methods.
+
+ .. method:: handle_defect(obj, defect)
+
+ Handle a *defect* found on *obj*. When the email package calls this
+ method, *defect* will always be a subclass of
+ :class:`~email.errors.Defect`.
+
+ The default implementation checks the :attr:`raise_on_defect` flag. If
+ it is ``True``, *defect* is raised as an exception. If it is ``False``
+ (the default), *obj* and *defect* are passed to :meth:`register_defect`.
+
+ .. method:: register_defect(obj, defect)
+
+ Register a *defect* on *obj*. In the email package, *defect* will always
+ be a subclass of :class:`~email.errors.Defect`.
+
+ The default implementation calls the ``append`` method of the ``defects``
+ attribute of *obj*. When the email package calls :attr:`handle_defect`,
+ *obj* will normally have a ``defects`` attribute that has an ``append``
+ method. Custom object types used with the email package (for example,
+ custom ``Message`` objects) should also provide such an attribute,
+ otherwise defects in parsed messages will raise unexpected errors.
+
+ .. method:: header_max_count(name)
+
+ Return the maximum allowed number of headers named *name*.
+
+ Called when a header is added to a :class:`~email.message.Message`
+ object. If the returned value is not ``0`` or ``None``, and there are
+ already a number of headers with the name *name* equal to the value
+ returned, a :exc:`ValueError` is raised.
+
+ Because the default behavior of ``Message.__setitem__`` is to append the
+ value to the list of headers, it is easy to create duplicate headers
+ without realizing it. This method allows certain headers to be limited
+ in the number of instances of that header that may be added to a
+ ``Message`` programmatically. (The limit is not observed by the parser,
+ which will faithfully produce as many headers as exist in the message
+ being parsed.)
+
+ The default implementation returns ``None`` for all header names.
+
+ .. method:: header_source_parse(sourcelines)
+
+ The email package calls this method with a list of strings, each string
+ ending with the line separation characters found in the source being
+ parsed. The first line includes the field header name and separator.
+ All whitespace in the source is preserved. The method should return the
+ ``(name, value)`` tuple that is to be stored in the ``Message`` to
+ represent the parsed header.
+
+ If an implementation wishes to retain compatibility with the existing
+ email package policies, *name* should be the case preserved name (all
+ characters up to the '``:``' separator), while *value* should be the
+ unfolded value (all line separator characters removed, but whitespace
+ kept intact), stripped of leading whitespace.
+
+ *sourcelines* may contain surrogateescaped binary data.
+
+ There is no default implementation
+
+ .. method:: header_store_parse(name, value)
+
+ The email package calls this method with the name and value provided by
+ the application program when the application program is modifying a
+ ``Message`` programmatically (as opposed to a ``Message`` created by a
+ parser). The method should return the ``(name, value)`` tuple that is to
+ be stored in the ``Message`` to represent the header.
+
+ If an implementation wishes to retain compatibility with the existing
+ email package policies, the *name* and *value* should be strings or
+ string subclasses that do not change the content of the passed in
+ arguments.
+
+ There is no default implementation
+
+ .. method:: header_fetch_parse(name, value)
+
+ The email package calls this method with the *name* and *value* currently
+ stored in the ``Message`` when that header is requested by the
+ application program, and whatever the method returns is what is passed
+ back to the application as the value of the header being retrieved.
+ Note that there may be more than one header with the same name stored in
+ the ``Message``; the method is passed the specific name and value of the
+ header destined to be returned to the application.
+
+ *value* may contain surrogateescaped binary data. There should be no
+ surrogateescaped binary data in the value returned by the method.
+
+ There is no default implementation
+
+ .. method:: fold(name, value)
+
+ The email package calls this method with the *name* and *value* currently
+ stored in the ``Message`` for a given header. The method should return a
+ string that represents that header "folded" correctly (according to the
+ policy settings) by composing the *name* with the *value* and inserting
+ :attr:`linesep` characters at the appropriate places. See :rfc:`5322`
+ for a discussion of the rules for folding email headers.
+
+ *value* may contain surrogateescaped binary data. There should be no
+ surrogateescaped binary data in the string returned by the method.
+
+ .. method:: fold_binary(name, value)
+
+ The same as :meth:`fold`, except that the returned value should be a
+ bytes object rather than a string.
+
+ *value* may contain surrogateescaped binary data. These could be
+ converted back into binary data in the returned bytes object.
+
+
+.. class:: Compat32(**kw)
+
+ This concrete :class:`Policy` is the backward compatibility policy. It
+ replicates the behavior of the email package in Python 3.2. The
+ :mod:`policy` module also defines an instance of this class,
+ :const:`compat32`, that is used as the default policy. Thus the default
+ behavior of the email package is to maintain compatibility with Python 3.2.
+
+ The class provides the following concrete implementations of the
+ abstract methods of :class:`Policy`:
+
+ .. method:: header_source_parse(sourcelines)
+
+ The name is parsed as everything up to the '``:``' and returned
+ unmodified. The value is determined by stripping leading whitespace off
+ the remainder of the first line, joining all subsequent lines together,
+ and stripping any trailing carriage return or linefeed characters.
+
+ .. method:: header_store_parse(name, value)
+
+ The name and value are returned unmodified.
+
+ .. method:: header_fetch_parse(name, value)
+
+ If the value contains binary data, it is converted into a
+ :class:`~email.header.Header` object using the ``unknown-8bit`` charset.
+ Otherwise it is returned unmodified.
+
+ .. method:: fold(name, value)
+
+ Headers are folded using the :class:`~email.header.Header` folding
+ algorithm, which preserves existing line breaks in the value, and wraps
+ each resulting line to the ``max_line_length``. Non-ASCII binary data are
+ CTE encoded using the ``unknown-8bit`` charset.
+
+ .. method:: fold_binary(name, value)
+
+ Headers are folded using the :class:`~email.header.Header` folding
+ algorithm, which preserves existing line breaks in the value, and wraps
+ each resulting line to the ``max_line_length``. If ``cte_type`` is
+ ``7bit``, non-ascii binary data is CTE encoded using the ``unknown-8bit``
+ charset. Otherwise the original source header is used, with its existing
+ line breaks and and any (RFC invalid) binary data it may contain.
+
+
+.. note::
+
+ The documentation below describes new policies that are included in the
+ standard library on a :term:`provisional basis <provisional package>`.
+ Backwards incompatible changes (up to and including removal of the feature)
+ may occur if deemed necessary by the core developers.
+
+
+.. class:: EmailPolicy(**kw)
+
+ This concrete :class:`Policy` provides behavior that is intended to be fully
+ compliant with the current email RFCs. These include (but are not limited
+ to) :rfc:`5322`, :rfc:`2047`, and the current MIME RFCs.
+
+ This policy adds new header parsing and folding algorithms. Instead of
+ simple strings, headers are custom objects with custom attributes depending
+ on the type of the field. The parsing and folding algorithm fully implement
+ :rfc:`2047` and :rfc:`5322`.
+
+ In addition to the settable attributes listed above that apply to all
+ policies, this policy adds the following additional attributes:
+
+ .. attribute:: refold_source
+
+ If the value for a header in the ``Message`` object originated from a
+ :mod:`~email.parser` (as opposed to being set by a program), this
+ attribute indicates whether or not a generator should refold that value
+ when transforming the message back into stream form. The possible values
+ are:
+
+ ======== ===============================================================
+ ``none`` all source values use original folding
+
+ ``long`` source values that have any line that is longer than
+ ``max_line_length`` will be refolded
+
+ ``all`` all values are refolded.
+ ======== ===============================================================
+
+ The default is ``long``.
+
+ .. attribute:: header_factory
+
+ A callable that takes two arguments, ``name`` and ``value``, where
+ ``name`` is a header field name and ``value`` is an unfolded header field
+ value, and returns a string subclass that represents that header. A
+ default ``header_factory`` (see :mod:`~email.headerregistry`) is provided
+ that understands some of the :RFC:`5322` header field types. (Currently
+ address fields and date fields have special treatment, while all other
+ fields are treated as unstructured. This list will be completed before
+ the extension is marked stable.)
+
+ The class provides the following concrete implementations of the abstract
+ methods of :class:`Policy`:
+
+ .. method:: header_max_count(name)
+
+ Returns the value of the
+ :attr:`~email.headerregistry.BaseHeader.max_count` attribute of the
+ specialized class used to represent the header with the given name.
+
+ .. method:: header_source_parse(sourcelines)
+
+ The implementation of this method is the same as that for the
+ :class:`Compat32` policy.
+
+ .. method:: header_store_parse(name, value)
+
+ The name is returned unchanged. If the input value has a ``name``
+ attribute and it matches *name* ignoring case, the value is returned
+ unchanged. Otherwise the *name* and *value* are passed to
+ ``header_factory``, and the resulting custom header object is returned as
+ the value. In this case a ``ValueError`` is raised if the input value
+ contains CR or LF characters.
+
+ .. method:: header_fetch_parse(name, value)
+
+ If the value has a ``name`` attribute, it is returned to unmodified.
+ Otherwise the *name*, and the *value* with any CR or LF characters
+ removed, are passed to the ``header_factory``, and the resulting custom
+ header object is returned. Any surrogateescaped bytes get turned into
+ the unicode unknown-character glyph.
+
+ .. method:: fold(name, value)
+
+ Header folding is controlled by the :attr:`refold_source` policy setting.
+ A value is considered to be a 'source value' if and only if it does not
+ have a ``name`` attribute (having a ``name`` attribute means it is a
+ header object of some sort). If a source value needs to be refolded
+ according to the policy, it is converted into a custom header object by
+ passing the *name* and the *value* with any CR and LF characters removed
+ to the ``header_factory``. Folding of a custom header object is done by
+ calling its ``fold`` method with the current policy.
+
+ Source values are split into lines using :meth:`~str.splitlines`. If
+ the value is not to be refolded, the lines are rejoined using the
+ ``linesep`` from the policy and returned. The exception is lines
+ containing non-ascii binary data. In that case the value is refolded
+ regardless of the ``refold_source`` setting, which causes the binary data
+ to be CTE encoded using the ``unknown-8bit`` charset.
+
+ .. method:: fold_binary(name, value)
+
+ The same as :meth:`fold` if :attr:`cte_type` is ``7bit``, except that
+ the returned value is bytes.
+
+ If :attr:`cte_type` is ``8bit``, non-ASCII binary data is converted back
+ into bytes. Headers with binary data are not refolded, regardless of the
+ ``refold_header`` setting, since there is no way to know whether the
+ binary data consists of single byte characters or multibyte characters.
+
+The following instances of :class:`EmailPolicy` provide defaults suitable for
+specific application domains. Note that in the future the behavior of these
+instances (in particular the ``HTTP`` instance) may be adjusted to conform even
+more closely to the RFCs relevant to their domains.
+
+.. data:: default
+
+ An instance of ``EmailPolicy`` with all defaults unchanged. This policy
+ uses the standard Python ``\n`` line endings rather than the RFC-correct
+ ``\r\n``.
+
+.. data:: SMTP
+
+ Suitable for serializing messages in conformance with the email RFCs.
+ Like ``default``, but with ``linesep`` set to ``\r\n``, which is RFC
+ compliant.
+
+.. data:: HTTP
+
+ Suitable for serializing headers with for use in HTTP traffic. Like
+ ``SMTP`` except that ``max_line_length`` is set to ``None`` (unlimited).
+
+.. data:: strict
+
+ Convenience instance. The same as ``default`` except that
+ ``raise_on_defect`` is set to ``True``. This allows any policy to be made
+ strict by writing::
+
+ somepolicy + policy.strict
+
+With all of these :class:`EmailPolicies <.EmailPolicy>`, the effective API of
+the email package is changed from the Python 3.2 API in the following ways:
+
+ * Setting a header on a :class:`~email.message.Message` results in that
+ header being parsed and a custom header object created.
+
+ * Fetching a header value from a :class:`~email.message.Message` results
+ in that header being parsed and a custom header object created and
+ returned.
+
+ * Any custom header object, or any header that is refolded due to the
+ policy settings, is folded using an algorithm that fully implements the
+ RFC folding algorithms, including knowing where encoded words are required
+ and allowed.
+
+From the application view, this means that any header obtained through the
+:class:`~email.message.Message` is a custom header object with custom
+attributes, whose string value is the fully decoded unicode value of the
+header. Likewise, a header may be assigned a new value, or a new header
+created, using a unicode string, and the policy will take care of converting
+the unicode string into the correct RFC encoded form.
+
+The custom header objects and their attributes are described in
+:mod:`~email.headerregistry`.
diff --git a/Doc/library/email.rst b/Doc/library/email.rst
index 4530b95..ef5354f 100644
--- a/Doc/library/email.rst
+++ b/Doc/library/email.rst
@@ -51,6 +51,8 @@ Contents of the :mod:`email` package documentation:
email.message.rst
email.parser.rst
email.generator.rst
+ email.policy.rst
+ email.headerregistry.rst
email.mime.rst
email.header.rst
email.charset.rst
diff --git a/Doc/library/email.util.rst b/Doc/library/email.util.rst
index 11bf3b2..bad0b24 100644
--- a/Doc/library/email.util.rst
+++ b/Doc/library/email.util.rst
@@ -29,13 +29,21 @@ There are several useful utilities provided in the :mod:`email.utils` module:
fails, in which case a 2-tuple of ``('', '')`` is returned.
-.. function:: formataddr(pair)
+.. function:: formataddr(pair, charset='utf-8')
The inverse of :meth:`parseaddr`, this takes a 2-tuple of the form ``(realname,
email_address)`` and returns the string value suitable for a :mailheader:`To` or
:mailheader:`Cc` header. If the first element of *pair* is false, then the
second element is returned unmodified.
+ Optional *charset* is the character set that will be used in the :rfc:`2047`
+ encoding of the ``realname`` if the ``realname`` contains non-ASCII
+ characters. Can be an instance of :class:`str` or a
+ :class:`~email.charset.Charset`. Defaults to ``utf-8``.
+
+ .. versionchanged:: 3.3
+ Added the *charset* option.
+
.. function:: getaddresses(fieldvalues)
@@ -74,6 +82,20 @@ There are several useful utilities provided in the :mod:`email.utils` module:
indexes 6, 7, and 8 of the result tuple are not usable.
+.. function:: parsedate_to_datetime(date)
+
+ The inverse of :func:`format_datetime`. Performs the same function as
+ :func:`parsedate`, but on success returns a :mod:`~datetime.datetime`. If
+ the input date has a timezone of ``-0000``, the ``datetime`` will be a naive
+ ``datetime``, and if the date is conforming to the RFCs it will represent a
+ time in UTC but with no indication of the actual source timezone of the
+ message the date comes from. If the input date has any other valid timezone
+ offset, the ``datetime`` will be an aware ``datetime`` with the
+ corresponding a :class:`~datetime.timezone` :class:`~datetime.tzinfo`.
+
+ .. versionadded:: 3.3
+
+
.. function:: mktime_tz(tuple)
Turn a 10-tuple as returned by :func:`parsedate_tz` into a UTC timestamp. It
@@ -105,6 +127,36 @@ There are several useful utilities provided in the :mod:`email.utils` module:
``False``. The default is ``False``.
+.. function:: format_datetime(dt, usegmt=False)
+
+ Like ``formatdate``, but the input is a :mod:`datetime` instance. If it is
+ a naive datetime, it is assumed to be "UTC with no information about the
+ source timezone", and the conventional ``-0000`` is used for the timezone.
+ If it is an aware ``datetime``, then the numeric timezone offset is used.
+ If it is an aware timezone with offset zero, then *usegmt* may be set to
+ ``True``, in which case the string ``GMT`` is used instead of the numeric
+ timezone offset. This provides a way to generate standards conformant HTTP
+ date headers.
+
+ .. versionadded:: 3.3
+
+
+.. function:: localtime(dt=None)
+
+ Return local time as an aware datetime object. If called without
+ arguments, return current time. Otherwise *dt* argument should be a
+ :class:`~datetime.datetime` instance, and it is converted to the local time
+ zone according to the system time zone database. If *dt* is naive (that
+ is, ``dt.tzinfo`` is ``None``), it is assumed to be in local time. In this
+ case, a positive or zero value for *isdst* causes ``localtime`` to presume
+ initially that summer time (for example, Daylight Saving Time) is or is not
+ (respectively) in effect for the specified time. A negative value for
+ *isdst* causes the ``localtime`` to attempt to divine whether summer time
+ is in effect for the specified time.
+
+ .. versionadded:: 3.3
+
+
.. function:: make_msgid(idstring=None, domain=None)
Returns a string suitable for an :rfc:`2822`\ -compliant
@@ -115,7 +167,8 @@ There are several useful utilities provided in the :mod:`email.utils` module:
may be useful certain cases, such as a constructing distributed system that
uses a consistent domain name across multiple hosts.
- .. versionchanged:: 3.2 domain keyword added
+ .. versionchanged:: 3.2
+ Added the *domain* keyword.
.. function:: decode_rfc2231(s)
diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst
index 7d622c2..ac02215 100644
--- a/Doc/library/exceptions.rst
+++ b/Doc/library/exceptions.rst
@@ -34,6 +34,27 @@ programmers are encouraged to at least derive new exceptions from the
defining exceptions is available in the Python Tutorial under
:ref:`tut-userexceptions`.
+When raising (or re-raising) an exception in an :keyword:`except` clause
+:attr:`__context__` is automatically set to the last exception caught; if the
+new exception is not handled the traceback that is eventually displayed will
+include the originating exception(s) and the final exception.
+
+This implicit exception chain can be made explicit by using :keyword:`from` with
+:keyword:`raise`. The single argument to :keyword:`from` must be an exception
+or ``None``. It will be set as :attr:`__cause__` on the raised exception.
+Setting :attr:`__cause__` implicitly sets the :attr:`__suppress_context__` to
+``True``. If :attr:`__cause__` is an exception, it will be displayed. If
+:attr:`__cause__` is present or :attr:`__suppress_context__` has a true value,
+:attr:`__context__` will not be displayed.
+
+In either case, the default exception handling code will not display any of the
+remaining links in the :attr:`__context__` chain if :attr:`__cause__` has been
+set.
+
+
+Base classes
+------------
+
The following exceptions are used mostly as base classes for other exceptions.
.. exception:: BaseException
@@ -90,27 +111,8 @@ The following exceptions are used mostly as base classes for other exceptions.
can be raised directly by :func:`codecs.lookup`.
-.. exception:: EnvironmentError
-
- The base class for exceptions that can occur outside the Python system:
- :exc:`IOError`, :exc:`OSError`. When exceptions of this type are created with a
- 2-tuple, the first item is available on the instance's :attr:`errno` attribute
- (it is assumed to be an error number), and the second item is available on the
- :attr:`strerror` attribute (it is usually the associated error message). The
- tuple itself is also available on the :attr:`args` attribute.
-
- When an :exc:`EnvironmentError` exception is instantiated with a 3-tuple, the
- first two items are available as above, while the third item is available on the
- :attr:`filename` attribute. However, for backwards compatibility, the
- :attr:`args` attribute contains only a 2-tuple of the first two constructor
- arguments.
-
- The :attr:`filename` attribute is ``None`` when this exception is created with
- other than 3 arguments. The :attr:`errno` and :attr:`strerror` attributes are
- also ``None`` when the instance was created with other than 2 or 3 arguments.
- In this last case, :attr:`args` contains the verbatim constructor arguments as a
- tuple.
-
+Concrete exceptions
+-------------------
The following exceptions are the exceptions that are usually raised.
@@ -151,21 +153,19 @@ The following exceptions are the exceptions that are usually raised.
it is technically not an error.
-.. exception:: IOError
-
- Raised when an I/O operation (such as the built-in :func:`print` or
- :func:`open` functions or a method of a :term:`file object`) fails for an
- I/O-related reason, e.g., "file not found" or "disk full".
-
- This class is derived from :exc:`EnvironmentError`. See the discussion above
- for more information on exception instance attributes.
-
-
.. exception:: ImportError
Raised when an :keyword:`import` statement fails to find the module definition
or when a ``from ... import`` fails to find a name that is to be imported.
+ The :attr:`name` and :attr:`path` attributes can be set using keyword-only
+ arguments to the constructor. When set they represent the name of the module
+ that was attempted to be imported and the path to any file which triggered
+ the exception, respectively.
+
+ .. versionchanged:: 3.3
+ Added the :attr:`name` and :attr:`path` attributes.
+
.. exception:: IndexError
@@ -221,17 +221,30 @@ The following exceptions are the exceptions that are usually raised.
.. index:: module: errno
- This exception is derived from :exc:`EnvironmentError`. It is raised when a
- function returns a system-related error (not for illegal argument types or
- other incidental errors). The :attr:`errno` attribute is a numeric error
- code from :c:data:`errno`, and the :attr:`strerror` attribute is the
- corresponding string, as would be printed by the C function :c:func:`perror`.
- See the module :mod:`errno`, which contains names for the error codes defined
- by the underlying operating system.
+ This exception is raised when a system function returns a system-related
+ error, including I/O failures such as "file not found" or "disk full"
+ (not for illegal argument types or other incidental errors). Often a
+ subclass of :exc:`OSError` will actually be raised as described in
+ `OS exceptions`_ below. The :attr:`errno` attribute is a numeric error
+ code from the C variable :c:data:`errno`.
+
+ Under Windows, the :attr:`winerror` attribute gives you the native
+ Windows error code. The :attr:`errno` attribute is then an approximate
+ translation, in POSIX terms, of that native error code.
+
+ Under all platforms, the :attr:`strerror` attribute is the corresponding
+ error message as provided by the operating system (as formatted by the C
+ functions :c:func:`perror` under POSIX, and :c:func:`FormatMessage`
+ Windows).
- For exceptions that involve a file system path (such as :func:`chdir` or
- :func:`unlink`), the exception instance will contain a third attribute,
- :attr:`filename`, which is the file name passed to the function.
+ For exceptions that involve a file system path (such as :func:`open` or
+ :func:`os.unlink`), the exception instance will contain an additional
+ attribute, :attr:`filename`, which is the file name passed to the function.
+
+ .. versionchanged:: 3.3
+ :exc:`EnvironmentError`, :exc:`IOError`, :exc:`WindowsError`,
+ :exc:`VMSError`, :exc:`socket.error`, :exc:`select.error` and
+ :exc:`mmap.error` have been merged into :exc:`OSError`.
.. exception:: OverflowError
@@ -262,8 +275,20 @@ The following exceptions are the exceptions that are usually raised.
.. exception:: StopIteration
Raised by built-in function :func:`next` and an :term:`iterator`\'s
- :meth:`~iterator.__next__` method to signal that there are no further values.
+ :meth:`~iterator.__next__` method to signal that there are no further
+ items produced by the iterator.
+
+ The exception object has a single attribute :attr:`value`, which is
+ given as an argument when constructing the exception, and defaults
+ to :const:`None`.
+
+ When a generator function returns, a new :exc:`StopIteration` instance is
+ raised, and the value returned by the function is used as the
+ :attr:`value` parameter to the constructor of the exception.
+ .. versionchanged:: 3.3
+ Added ``value`` attribute and the ability for generator functions to
+ use it to return a value.
.. exception:: SyntaxError
@@ -372,27 +397,141 @@ The following exceptions are the exceptions that are usually raised.
more precise exception such as :exc:`IndexError`.
-.. exception:: VMSError
+.. exception:: ZeroDivisionError
- Only available on VMS. Raised when a VMS-specific error occurs.
+ Raised when the second argument of a division or modulo operation is zero. The
+ associated value is a string indicating the type of the operands and the
+ operation.
+The following exceptions are kept for compatibility with previous versions;
+starting from Python 3.3, they are aliases of :exc:`OSError`.
+
+.. exception:: EnvironmentError
+
+.. exception:: IOError
+
+.. exception:: VMSError
+
+ Only available on VMS.
+
.. exception:: WindowsError
- Raised when a Windows-specific error occurs or when the error number does not
- correspond to an :c:data:`errno` value. The :attr:`winerror` and
- :attr:`strerror` values are created from the return values of the
- :c:func:`GetLastError` and :c:func:`FormatMessage` functions from the Windows
- Platform API. The :attr:`errno` value maps the :attr:`winerror` value to
- corresponding ``errno.h`` values. This is a subclass of :exc:`OSError`.
+ Only available on Windows.
-.. exception:: ZeroDivisionError
+OS exceptions
+^^^^^^^^^^^^^
+
+The following exceptions are subclasses of :exc:`OSError`, they get raised
+depending on the system error code.
+
+.. exception:: BlockingIOError
+
+ Raised when an operation would block on an object (e.g. socket) set
+ for non-blocking operation.
+ Corresponds to :c:data:`errno` ``EAGAIN``, ``EALREADY``,
+ ``EWOULDBLOCK`` and ``EINPROGRESS``.
+
+ In addition to those of :exc:`OSError`, :exc:`BlockingIOError` can have
+ one more attribute:
+
+ .. attribute:: characters_written
+
+ An integer containing the number of characters written to the stream
+ before it blocked. This attribute is available when using the
+ buffered I/O classes from the :mod:`io` module.
+
+.. exception:: ChildProcessError
+
+ Raised when an operation on a child process failed.
+ Corresponds to :c:data:`errno` ``ECHILD``.
+
+.. exception:: ConnectionError
+
+ A base class for connection-related issues. Subclasses are
+ :exc:`BrokenPipeError`, :exc:`ConnectionAbortedError`,
+ :exc:`ConnectionRefusedError` and :exc:`ConnectionResetError`.
+
+ .. exception:: BrokenPipeError
+
+ A subclass of :exc:`ConnectionError`, raised when trying to write on a
+ pipe while the other end has been closed, or trying to write on a socket
+ which has been shutdown for writing.
+ Corresponds to :c:data:`errno` ``EPIPE`` and ``ESHUTDOWN``.
+
+ .. exception:: ConnectionAbortedError
+
+ A subclass of :exc:`ConnectionError`, raised when a connection attempt
+ is aborted by the peer.
+ Corresponds to :c:data:`errno` ``ECONNABORTED``.
+
+ .. exception:: ConnectionRefusedError
+
+ A subclass of :exc:`ConnectionError`, raised when a connection attempt
+ is refused by the peer.
+ Corresponds to :c:data:`errno` ``ECONNREFUSED``.
+
+ .. exception:: ConnectionResetError
+
+ A subclass of :exc:`ConnectionError`, raised when a connection is
+ reset by the peer.
+ Corresponds to :c:data:`errno` ``ECONNRESET``.
+
+.. exception:: FileExistsError
+
+ Raised when trying to create a file or directory which already exists.
+ Corresponds to :c:data:`errno` ``EEXIST``.
+
+.. exception:: FileNotFoundError
+
+ Raised when a file or directory is requested but doesn't exist.
+ Corresponds to :c:data:`errno` ``ENOENT``.
+
+.. exception:: InterruptedError
+
+ Raised when a system call is interrupted by an incoming signal.
+ Corresponds to :c:data:`errno` ``EEINTR``.
+
+.. exception:: IsADirectoryError
+
+ Raised when a file operation (such as :func:`os.remove`) is requested
+ on a directory.
+ Corresponds to :c:data:`errno` ``EISDIR``.
+
+.. exception:: NotADirectoryError
+
+ Raised when a directory operation (such as :func:`os.listdir`) is requested
+ on something which is not a directory.
+ Corresponds to :c:data:`errno` ``ENOTDIR``.
+
+.. exception:: PermissionError
+
+ Raised when trying to run an operation without the adequate access
+ rights - for example filesystem permissions.
+ Corresponds to :c:data:`errno` ``EACCES`` and ``EPERM``.
+
+.. exception:: ProcessLookupError
+
+ Raised when a given process doesn't exist.
+ Corresponds to :c:data:`errno` ``ESRCH``.
+
+.. exception:: TimeoutError
+
+ Raised when a system function timed out at the system level.
+ Corresponds to :c:data:`errno` ``ETIMEDOUT``.
+
+.. versionadded:: 3.3
+ All the above :exc:`OSError` subclasses were added.
+
+
+.. seealso::
+
+ :pep:`3151` - Reworking the OS and IO exception hierarchy
- Raised when the second argument of a division or modulo operation is zero. The
- associated value is a string indicating the type of the operands and the
- operation.
+Warnings
+--------
The following exceptions are used as warning categories; see the :mod:`warnings`
module for more information.
diff --git a/Doc/library/faulthandler.rst b/Doc/library/faulthandler.rst
new file mode 100644
index 0000000..3c33621
--- /dev/null
+++ b/Doc/library/faulthandler.rst
@@ -0,0 +1,136 @@
+:mod:`faulthandler` --- Dump the Python traceback
+=================================================
+
+.. module:: faulthandler
+ :synopsis: Dump the Python traceback.
+
+This module contains functions to dump Python tracebacks explicitly, on a fault,
+after a timeout, or on a user signal. Call :func:`faulthandler.enable` to
+install fault handlers for the :const:`SIGSEGV`, :const:`SIGFPE`,
+:const:`SIGABRT`, :const:`SIGBUS`, and :const:`SIGILL` signals. You can also
+enable them at startup by setting the :envvar:`PYTHONFAULTHANDLER` environment
+variable or by using :option:`-X` ``faulthandler`` command line option.
+
+The fault handler is compatible with system fault handlers like Apport or the
+Windows fault handler. The module uses an alternative stack for signal handlers
+if the :c:func:`sigaltstack` function is available. This allows it to dump the
+traceback even on a stack overflow.
+
+The fault handler is called on catastrophic cases and therefore can only use
+signal-safe functions (e.g. it cannot allocate memory on the heap). Because of
+this limitation traceback dumping is minimal compared to normal Python
+tracebacks:
+
+* Only ASCII is supported. The ``backslashreplace`` error handler is used on
+ encoding.
+* Each string is limited to 500 characters.
+* Only the filename, the function name and the line number are
+ displayed. (no source code)
+* It is limited to 100 frames and 100 threads.
+
+By default, the Python traceback is written to :data:`sys.stderr`. To see
+tracebacks, applications must be run in the terminal. A log file can
+alternatively be passed to :func:`faulthandler.enable`.
+
+The module is implemented in C, so tracebacks can be dumped on a crash or when
+Python is deadlocked.
+
+.. versionadded:: 3.3
+
+
+Dump the traceback
+------------------
+
+.. function:: dump_traceback(file=sys.stderr, all_threads=True)
+
+ Dump the tracebacks of all threads into *file*. If *all_threads* is
+ ``False``, dump only the current thread.
+
+
+Fault handler state
+-------------------
+
+.. function:: enable(file=sys.stderr, all_threads=True)
+
+ Enable the fault handler: install handlers for the :const:`SIGSEGV`,
+ :const:`SIGFPE`, :const:`SIGABRT`, :const:`SIGBUS` and :const:`SIGILL`
+ signals to dump the Python traceback. If *all_threads* is ``True``,
+ produce tracebacks for every running thread. Otherwise, dump only the current
+ thread.
+
+.. function:: disable()
+
+ Disable the fault handler: uninstall the signal handlers installed by
+ :func:`enable`.
+
+.. function:: is_enabled()
+
+ Check if the fault handler is enabled.
+
+
+Dump the tracebacks after a timeout
+-----------------------------------
+
+.. function:: dump_traceback_later(timeout, repeat=False, file=sys.stderr, exit=False)
+
+ Dump the tracebacks of all threads, after a timeout of *timeout* seconds, or
+ every *timeout* seconds if *repeat* is ``True``. If *exit* is ``True``, call
+ :c:func:`_exit` with status=1 after dumping the tracebacks. (Note
+ :c:func:`_exit` exits the process immediately, which means it doesn't do any
+ cleanup like flushing file buffers.) If the function is called twice, the new
+ call replaces previous parameters and resets the timeout. The timer has a
+ sub-second resolution.
+
+ This function is implemented using a watchdog thread and therefore is not
+ available if Python is compiled with threads disabled.
+
+.. function:: cancel_dump_traceback_later()
+
+ Cancel the last call to :func:`dump_traceback_later`.
+
+
+Dump the traceback on a user signal
+-----------------------------------
+
+.. function:: register(signum, file=sys.stderr, all_threads=True, chain=False)
+
+ Register a user signal: install a handler for the *signum* signal to dump
+ the traceback of all threads, or of the current thread if *all_threads* is
+ ``False``, into *file*. Call the previous handler if chain is ``True``.
+
+ Not available on Windows.
+
+.. function:: unregister(signum)
+
+ Unregister a user signal: uninstall the handler of the *signum* signal
+ installed by :func:`register`. Return ``True`` if the signal was registered,
+ ``False`` otherwise.
+
+ Not available on Windows.
+
+
+File descriptor issue
+---------------------
+
+:func:`enable`, :func:`dump_traceback_later` and :func:`register` keep the
+file descriptor of their *file* argument. If the file is closed and its file
+descriptor is reused by a new file, or if :func:`os.dup2` is used to replace
+the file descriptor, the traceback will be written into a different file. Call
+these functions again each time that the file is replaced.
+
+
+Example
+-------
+
+Example of a segmentation fault on Linux: ::
+
+ $ python -q -X faulthandler
+ >>> import ctypes
+ >>> ctypes.string_at(0)
+ Fatal Python error: Segmentation fault
+
+ Current thread 0x00007fb899f39700:
+ File "/home/python/cpython/Lib/ctypes/__init__.py", line 486 in string_at
+ File "<stdin>", line 1 in <module>
+ Segmentation fault
+
diff --git a/Doc/library/fcntl.rst b/Doc/library/fcntl.rst
index 6192400..9a9cdc1 100644
--- a/Doc/library/fcntl.rst
+++ b/Doc/library/fcntl.rst
@@ -19,6 +19,11 @@ argument. This can be an integer file descriptor, such as returned by
``sys.stdin.fileno()``, or a :class:`io.IOBase` object, such as ``sys.stdin``
itself, which provides a :meth:`fileno` that returns a genuine file descriptor.
+.. versionchanged:: 3.3
+ Operations in this module used to raise a :exc:`IOError` where they now
+ raise a :exc:`OSError`.
+
+
The module defines the following functions:
@@ -40,7 +45,7 @@ The module defines the following functions:
larger than 1024 bytes, this is most likely to result in a segmentation
violation or a more subtle data corruption.
- If the :c:func:`fcntl` fails, an :exc:`IOError` is raised.
+ If the :c:func:`fcntl` fails, an :exc:`OSError` is raised.
.. function:: ioctl(fd, op[, arg[, mutate_flag]])
@@ -107,7 +112,7 @@ The module defines the following functions:
When *operation* is :const:`LOCK_SH` or :const:`LOCK_EX`, it can also be
bitwise ORed with :const:`LOCK_NB` to avoid blocking on lock acquisition.
If :const:`LOCK_NB` is used and the lock cannot be acquired, an
- :exc:`IOError` will be raised and the exception will have an *errno*
+ :exc:`OSError` will be raised and the exception will have an *errno*
attribute set to :const:`EACCES` or :const:`EAGAIN` (depending on the
operating system; for portability, check for both values). On at least some
systems, :const:`LOCK_EX` can only be used if the file descriptor refers to a
diff --git a/Doc/library/filecmp.rst b/Doc/library/filecmp.rst
index de20fb1..add68a3 100644
--- a/Doc/library/filecmp.rst
+++ b/Doc/library/filecmp.rst
@@ -21,11 +21,8 @@ The :mod:`filecmp` module defines the following functions:
Compare the files named *f1* and *f2*, returning ``True`` if they seem equal,
``False`` otherwise.
- Unless *shallow* is given and is false, files with identical :func:`os.stat`
- signatures are taken to be equal.
-
- Files that were compared using this function will not be compared again unless
- their :func:`os.stat` signature changes.
+ If *shallow* is true, files with identical :func:`os.stat` signatures are
+ taken to be equal. Otherwise, the contents of the files are compared.
Note that no external programs are called from this function, giving it
portability and efficiency.
@@ -51,23 +48,11 @@ The :mod:`filecmp` module defines the following functions:
one of the three returned lists.
-Example::
-
- >>> import filecmp
- >>> filecmp.cmp('undoc.rst', 'undoc.rst')
- True
- >>> filecmp.cmp('undoc.rst', 'index.rst')
- False
-
-
.. _dircmp-objects:
The :class:`dircmp` class
-------------------------
-:class:`dircmp` instances are built using this constructor:
-
-
.. class:: dircmp(a, b, ignore=None, hide=None)
Construct a new directory comparison object, to compare the directories *a* and
@@ -83,7 +68,7 @@ The :class:`dircmp` class
.. method:: report()
- Print (to ``sys.stdout``) a comparison between *a* and *b*.
+ Print (to :data:`sys.stdout`) a comparison between *a* and *b*.
.. method:: report_partial_closure()
diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst
index ac44311..f8ec436 100644
--- a/Doc/library/fileinput.rst
+++ b/Doc/library/fileinput.rst
@@ -28,7 +28,10 @@ as the first argument to :func:`.input`. A single file name is also allowed.
All files are opened in text mode by default, but you can override this by
specifying the *mode* parameter in the call to :func:`.input` or
:class:`FileInput`. If an I/O error occurs during opening or reading a file,
-:exc:`IOError` is raised.
+:exc:`OSError` is raised.
+
+.. versionchanged:: 3.3
+ :exc:`IOError` used to be raised; it is now an alias of :exc:`OSError`.
If ``sys.stdin`` is used more than once, the second and further use will return
no lines, except perhaps for interactive use, or if it has been explicitly reset
@@ -168,10 +171,6 @@ and the backup file remains around; by default, the extension is ``'.bak'`` and
it is deleted when the output file is closed. In-place filtering is disabled
when standard input is read.
-.. note::
-
- The current implementation does not work for MS-DOS 8+3 filesystems.
-
The two following opening hooks are provided by this module:
diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst
index a669b1b..1419af7 100644
--- a/Doc/library/ftplib.rst
+++ b/Doc/library/ftplib.rst
@@ -40,7 +40,7 @@ Here's a sample session using the :mod:`ftplib` module::
The module defines the following items:
-.. class:: FTP(host='', user='', passwd='', acct=''[, timeout])
+.. class:: FTP(host='', user='', passwd='', acct='', timeout=None, source_address=None)
Return a new instance of the :class:`FTP` class. When *host* is given, the
method call ``connect(host)`` is made. When *user* is given, additionally
@@ -48,7 +48,8 @@ The module defines the following items:
*acct* default to the empty string when not given). The optional *timeout*
parameter specifies a timeout in seconds for blocking operations like the
connection attempt (if is not specified, the global default timeout setting
- will be used).
+ will be used). *source_address* is a 2-tuple ``(host, port)`` for the socket
+ to bind to as its source address before connecting.
:class:`FTP` class supports the :keyword:`with` statement. Here is a sample
on how using it:
@@ -68,8 +69,11 @@ The module defines the following items:
.. versionchanged:: 3.2
Support for the :keyword:`with` statement was added.
+ .. versionchanged:: 3.3
+ *source_address* parameter was added.
-.. class:: FTP_TLS(host='', user='', passwd='', acct='', [keyfile[, certfile[, context[, timeout]]]])
+
+.. class:: FTP_TLS(host='', user='', passwd='', acct='', keyfile=None, certfile=None, context=None, timeout=None, source_address=None)
A :class:`FTP` subclass which adds TLS support to FTP as described in
:rfc:`4217`.
@@ -80,10 +84,15 @@ The module defines the following items:
private key and certificate chain file name for the SSL connection.
*context* parameter is a :class:`ssl.SSLContext` object which allows
bundling SSL configuration options, certificates and private keys into a
- single (potentially long-lived) structure.
+ single (potentially long-lived) structure. *source_address* is a 2-tuple
+ ``(host, port)`` for the socket to bind to as its source address before
+ connecting.
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ *source_address* parameter was added.
+
Here's a sample session using the :class:`FTP_TLS` class:
>>> from ftplib import FTP_TLS
@@ -135,8 +144,7 @@ The module defines the following items:
The set of all exceptions (as a tuple) that methods of :class:`FTP`
instances may raise as a result of problems with the FTP connection (as
opposed to programming errors made by the caller). This set includes the
- four exceptions listed above as well as :exc:`socket.error` and
- :exc:`IOError`.
+ four exceptions listed above as well as :exc:`OSError`.
.. seealso::
@@ -174,7 +182,7 @@ followed by ``lines`` for the text version or ``binary`` for the binary version.
debugging output, logging each line sent and received on the control connection.
-.. method:: FTP.connect(host='', port=0[, timeout])
+.. method:: FTP.connect(host='', port=0, timeout=None, source_address=None)
Connect to the given host and port. The default port number is ``21``, as
specified by the FTP protocol specification. It is rarely needed to specify a
@@ -182,10 +190,14 @@ followed by ``lines`` for the text version or ``binary`` for the binary version.
instance; it should not be called at all if a host was given when the instance
was created. All other methods can only be used after a connection has been
made.
-
The optional *timeout* parameter specifies a timeout in seconds for the
connection attempt. If no *timeout* is passed, the global default timeout
setting will be used.
+ *source_address* is a 2-tuple ``(host, port)`` for the socket to bind to as
+ its source address before connecting.
+
+ .. versionchanged:: 3.3
+ *source_address* parameter was added.
.. method:: FTP.getwelcome()
@@ -241,13 +253,12 @@ followed by ``lines`` for the text version or ``binary`` for the binary version.
Retrieve a file or directory listing in ASCII transfer mode. *cmd* should be
an appropriate ``RETR`` command (see :meth:`retrbinary`) or a command such as
- ``LIST``, ``NLST`` or ``MLSD`` (usually just the string ``'LIST'``).
+ ``LIST`` or ``NLST`` (usually just the string ``'LIST'``).
``LIST`` retrieves a list of files and information about those files.
- ``NLST`` retrieves a list of file names. On some servers, ``MLSD`` retrieves
- a machine readable list of files and information about those files. The
- *callback* function is called for each line with a string argument containing
- the line with the trailing CRLF stripped. The default *callback* prints the
- line to ``sys.stdout``.
+ ``NLST`` retrieves a list of file names.
+ The *callback* function is called for each line with a string argument
+ containing the line with the trailing CRLF stripped. The default *callback*
+ prints the line to ``sys.stdout``.
.. method:: FTP.set_pasv(boolean)
@@ -307,6 +318,20 @@ followed by ``lines`` for the text version or ``binary`` for the binary version.
in :meth:`transfercmd`.
+.. method:: FTP.mlsd(path="", facts=[])
+
+ List a directory in a standardized format by using MLSD command
+ (:rfc:`3659`). If *path* is omitted the current directory is assumed.
+ *facts* is a list of strings representing the type of information desired
+ (e.g. ``["type", "size", "perm"]``). Return a generator object yielding a
+ tuple of two elements for every file found in path. First element is the
+ file name, the second one is a dictionary containing facts about the file
+ name. Content of this dictionary might be limited by the *facts* argument
+ but server is not guaranteed to return all requested facts.
+
+ .. versionadded:: 3.3
+
+
.. method:: FTP.nlst(argument[, ...])
Return a list of file names as returned by the ``NLST`` command. The
@@ -314,6 +339,8 @@ followed by ``lines`` for the text version or ``binary`` for the binary version.
directory). Multiple arguments can be used to pass non-standard options to
the ``NLST`` command.
+ .. deprecated:: 3.3 use :meth:`mlsd` instead.
+
.. method:: FTP.dir(argument[, ...])
@@ -324,6 +351,8 @@ followed by ``lines`` for the text version or ``binary`` for the binary version.
as a *callback* function as for :meth:`retrlines`; the default prints to
``sys.stdout``. This method returns ``None``.
+ .. deprecated:: 3.3 use :meth:`mlsd` instead.
+
.. method:: FTP.rename(fromname, toname)
@@ -396,6 +425,14 @@ FTP_TLS Objects
Set up secure control connection by using TLS or SSL, depending on what specified in :meth:`ssl_version` attribute.
+.. method:: FTP_TLS.ccc()
+
+ Revert control channel back to plaintext. This can be useful to take
+ advantage of firewalls that know how to handle NAT with non-secure FTP
+ without opening fixed ports.
+
+ .. versionadded:: 3.3
+
.. method:: FTP_TLS.prot_p()
Set up secure data connection.
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index b7d7e08..ae49cc2 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -17,9 +17,9 @@ are always available. They are listed here in alphabetical order.
:func:`bin` :func:`eval` :func:`int` :func:`open` :func:`str`
:func:`bool` :func:`exec` :func:`isinstance` :func:`ord` :func:`sum`
:func:`bytearray` :func:`filter` :func:`issubclass` :func:`pow` :func:`super`
-:func:`bytes` :func:`float` :func:`iter` :func:`print` :func:`tuple`
+:func:`bytes` :func:`float` :func:`iter` :func:`print` |func-tuple|_
:func:`callable` :func:`format` :func:`len` :func:`property` :func:`type`
-:func:`chr` |func-frozenset|_ :func:`list` :func:`range` :func:`vars`
+:func:`chr` |func-frozenset|_ |func-list|_ |func-range|_ :func:`vars`
:func:`classmethod` :func:`getattr` :func:`locals` :func:`repr` :func:`zip`
:func:`compile` :func:`globals` :func:`map` :func:`reversed` :func:`__import__`
:func:`complex` :func:`hasattr` :func:`max` :func:`round`
@@ -33,6 +33,9 @@ are always available. They are listed here in alphabetical order.
.. |func-frozenset| replace:: ``frozenset()``
.. |func-memoryview| replace:: ``memoryview()``
.. |func-set| replace:: ``set()``
+.. |func-list| replace:: ``list()``
+.. |func-tuple| replace:: ``tuple()``
+.. |func-range| replace:: ``range()``
.. function:: abs(x)
@@ -93,6 +96,7 @@ are always available. They are listed here in alphabetical order.
.. index:: pair: Boolean; type
+.. _func-bytearray:
.. function:: bytearray([source[, encoding[, errors]]])
Return a new array of bytes. The :class:`bytearray` type is a mutable
@@ -118,7 +122,10 @@ are always available. They are listed here in alphabetical order.
Without an argument, an array of size 0 is created.
+ See also :ref:`binaryseq` and :ref:`typebytearray`.
+
+.. _func-bytes:
.. function:: bytes([source[, encoding[, errors]]])
Return a new "bytes" object, which is an immutable sequence of integers in
@@ -130,6 +137,8 @@ are always available. They are listed here in alphabetical order.
Bytes objects can also be created with literals, see :ref:`strings`.
+ See also :ref:`binaryseq`, :ref:`typebytes`, and :ref:`bytes-methods`.
+
.. function:: callable(object)
@@ -152,10 +161,6 @@ are always available. They are listed here in alphabetical order.
1,114,111 (0x10FFFF in base 16). :exc:`ValueError` will be raised if *i* is
outside that range.
- Note that on narrow Unicode builds, the result is a string of
- length two for *i* greater than 65,535 (0xFFFF in hexadecimal).
-
-
.. function:: classmethod(function)
@@ -312,17 +317,18 @@ are always available. They are listed here in alphabetical order.
>>> import struct
>>> dir() # show the names in the module namespace
- ['__builtins__', '__doc__', '__name__', 'struct']
- >>> dir(struct) # show the names in the struct module
- ['Struct', '__builtins__', '__doc__', '__file__', '__name__',
- '__package__', '_clearcache', 'calcsize', 'error', 'pack', 'pack_into',
+ ['__builtins__', '__name__', 'struct']
+ >>> dir(struct) # show the names in the struct module # doctest: +SKIP
+ ['Struct', '__all__', '__builtins__', '__cached__', '__doc__', '__file__',
+ '__initializing__', '__loader__', '__name__', '__package__',
+ '_clearcache', 'calcsize', 'error', 'pack', 'pack_into',
'unpack', 'unpack_from']
>>> class Shape(object):
- def __dir__(self):
- return ['area', 'perimeter', 'location']
+ ... def __dir__(self):
+ ... return ['area', 'perimeter', 'location']
>>> s = Shape()
>>> dir(s)
- ['area', 'perimeter', 'location']
+ ['area', 'location', 'perimeter']
.. note::
@@ -621,9 +627,9 @@ are always available. They are listed here in alphabetical order.
to a string (stripping a trailing newline), and returns that. When EOF is
read, :exc:`EOFError` is raised. Example::
- >>> s = input('--> ')
+ >>> s = input('--> ') # doctest: +SKIP
--> Monty Python's Flying Circus
- >>> s
+ >>> s # doctest: +SKIP
"Monty Python's Flying Circus"
If the :mod:`readline` module was loaded, then :func:`input` will use it
@@ -689,6 +695,8 @@ are always available. They are listed here in alphabetical order.
*sentinel*, :exc:`StopIteration` will be raised, otherwise the value will
be returned.
+ See also :ref:`typeiter`.
+
One useful application of the second form of :func:`iter` is to read lines of
a file until a certain line is reached. The following example reads a file
until the :meth:`readline` method returns an empty string::
@@ -704,16 +712,12 @@ are always available. They are listed here in alphabetical order.
sequence (string, tuple or list) or a mapping (dictionary).
+.. _func-list:
.. function:: list([iterable])
+ :noindex:
- Return a list whose items are the same and in the same order as *iterable*'s
- items. *iterable* may be either a sequence, a container that supports
- iteration, or an iterator object. If *iterable* is already a list, a copy is
- made and returned, similar to ``iterable[:]``. For instance, ``list('abc')``
- returns ``['a', 'b', 'c']`` and ``list( (1, 2, 3) )`` returns ``[1, 2, 3]``.
- If no argument is given, returns a new empty list, ``[]``.
-
- :class:`list` is a mutable sequence type, as documented in :ref:`typesseq`.
+ Rather than being a function, :class:`list` is actually a mutable
+ sequence type, as documented in :ref:`typesseq-list` and :ref:`typesseq`.
.. function:: locals()
@@ -812,10 +816,10 @@ are always available. They are listed here in alphabetical order.
.. index::
single: file object; open() built-in function
-.. function:: open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True)
+.. function:: open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)
Open *file* and return a corresponding :term:`file object`. If the file
- cannot be opened, an :exc:`IOError` is raised.
+ cannot be opened, an :exc:`OSError` is raised.
*file* is either a string or bytes object giving the pathname (absolute or
relative to the current working directory) of the file to be opened or
@@ -826,17 +830,20 @@ are always available. They are listed here in alphabetical order.
*mode* is an optional string that specifies the mode in which the file is
opened. It defaults to ``'r'`` which means open for reading in text mode.
Other common values are ``'w'`` for writing (truncating the file if it
- already exists), and ``'a'`` for appending (which on *some* Unix systems,
- means that *all* writes append to the end of the file regardless of the
- current seek position). In text mode, if *encoding* is not specified the
- encoding used is platform dependent. (For reading and writing raw bytes use
- binary mode and leave *encoding* unspecified.) The available modes are:
+ already exists), ``'x'`` for exclusive creation and ``'a'`` for appending
+ (which on *some* Unix systems, means that *all* writes append to the end of
+ the file regardless of the current seek position). In text mode, if
+ *encoding* is not specified the encoding used is platform dependent:
+ ``locale.getpreferredencoding(False)`` is called to get the current locale
+ encoding. (For reading and writing raw bytes use binary mode and leave
+ *encoding* unspecified.) The available modes are:
========= ===============================================================
Character Meaning
--------- ---------------------------------------------------------------
``'r'`` open for reading (default)
``'w'`` open for writing, truncating the file first
+ ``'x'`` open for exclusive creation, failing if the file already exists
``'a'`` open for writing, appending to the end of the file if it exists
``'b'`` binary mode
``'t'`` text mode (default)
@@ -922,6 +929,16 @@ are always available. They are listed here in alphabetical order.
closed. If a filename is given *closefd* has no effect and must be ``True``
(the default).
+ A custom opener can be used by passing a callable as *opener*. The underlying
+ file descriptor for the file object is then obtained by calling *opener* with
+ (*file*, *flags*). *opener* must return an open file descriptor (passing
+ :mod:`os.open` as *opener* results in functionality similar to passing
+ ``None``).
+
+ .. versionchanged:: 3.3
+ The *opener* parameter was added.
+ The ``'x'`` mode was added.
+
The type of :term:`file object` returned by the :func:`open` function
depends on the mode. When :func:`open` is used to open a file in a text
mode (``'w'``, ``'r'``, ``'wt'``, ``'rt'``, etc.), it returns a subclass of
@@ -947,6 +964,11 @@ are always available. They are listed here in alphabetical order.
(where :func:`open` is declared), :mod:`os`, :mod:`os.path`, :mod:`tempfile`,
and :mod:`shutil`.
+ .. versionchanged:: 3.3
+ :exc:`IOError` used to be raised, it is now an alias of :exc:`OSError`.
+ :exc:`FileExistsError` is now raised if the file opened in exclusive
+ creation mode (``'x'``) already exists.
+
.. XXX works for bytes too, but should it?
.. function:: ord(c)
@@ -956,9 +978,6 @@ are always available. They are listed here in alphabetical order.
point of that character. For example, ``ord('a')`` returns the integer ``97``
and ``ord('\u2020')`` returns ``8224``. This is the inverse of :func:`chr`.
- On wide Unicode builds, if the argument length is not one, a
- :exc:`TypeError` will be raised. On narrow Unicode builds, strings
- of length two are accepted when they form a UTF-16 surrogate pair.
.. function:: pow(x, y[, z])
@@ -976,7 +995,7 @@ are always available. They are listed here in alphabetical order.
must be of integer types, and *y* must be non-negative.
-.. function:: print(*objects, sep=' ', end='\\n', file=sys.stdout)
+.. function:: print(*objects, sep=' ', end='\\n', file=sys.stdout, flush=False)
Print *objects* to the stream *file*, separated by *sep* and followed by
*end*. *sep*, *end* and *file*, if present, must be given as keyword
@@ -989,9 +1008,12 @@ are always available. They are listed here in alphabetical order.
*end*.
The *file* argument must be an object with a ``write(string)`` method; if it
- is not present or ``None``, :data:`sys.stdout` will be used. Output buffering
- is determined by *file*. Use ``file.flush()`` to ensure, for instance,
- immediate appearance on a screen.
+ is not present or ``None``, :data:`sys.stdout` will be used. Whether output
+ is buffered is usually determined by *file*, but if the *flush* keyword
+ argument is true, the stream is forcibly flushed.
+
+ .. versionchanged:: 3.3
+ Added the *flush* keyword argument.
.. function:: property(fget=None, fset=None, fdel=None, doc=None)
@@ -1063,63 +1085,13 @@ are always available. They are listed here in alphabetical order.
``fdel`` corresponding to the constructor arguments.
-.. XXX does accept objects with __index__ too
+.. _func-range:
.. function:: range(stop)
range(start, stop[, step])
+ :noindex:
- This is a versatile function to create iterables yielding arithmetic
- progressions. It is most often used in :keyword:`for` loops. The arguments
- must be integers. If the *step* argument is omitted, it defaults to ``1``.
- If the *start* argument is omitted, it defaults to ``0``. The full form
- returns an iterable of integers ``[start, start + step, start + 2 * step,
- ...]``. If *step* is positive, the last element is the largest ``start + i *
- step`` less than *stop*; if *step* is negative, the last element is the
- smallest ``start + i * step`` greater than *stop*. *step* must not be zero
- (or else :exc:`ValueError` is raised). Example:
-
- >>> list(range(10))
- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
- >>> list(range(1, 11))
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
- >>> list(range(0, 30, 5))
- [0, 5, 10, 15, 20, 25]
- >>> list(range(0, 10, 3))
- [0, 3, 6, 9]
- >>> list(range(0, -10, -1))
- [0, -1, -2, -3, -4, -5, -6, -7, -8, -9]
- >>> list(range(0))
- []
- >>> list(range(1, 0))
- []
-
- Range objects implement the :class:`collections.Sequence` ABC, and provide
- features such as containment tests, element index lookup, slicing and
- support for negative indices (see :ref:`typesseq`):
-
- >>> r = range(0, 20, 2)
- >>> r
- range(0, 20, 2)
- >>> 11 in r
- False
- >>> 10 in r
- True
- >>> r.index(10)
- 5
- >>> r[5]
- 10
- >>> r[:5]
- range(0, 10, 2)
- >>> r[-1]
- 18
-
- Ranges containing absolute values larger than :data:`sys.maxsize` are permitted
- but some features (such as :func:`len`) will raise :exc:`OverflowError`.
-
- .. versionchanged:: 3.2
- Implement the Sequence ABC.
- Support slicing and negative indices.
- Test integers for membership in constant time instead of iterating
- through all items.
+ Rather than being a function, :class:`range` is actually an immutable
+ sequence type, as documented in :ref:`typesseq-range` and :ref:`typesseq`.
.. function:: repr(object)
@@ -1240,10 +1212,12 @@ are always available. They are listed here in alphabetical order.
standard type hierarchy in :ref:`types`.
+.. _func-str:
.. function:: str(object='')
str(object[, encoding[, errors]])
- Return a string version of an object, using one of the following modes:
+ Return a :ref:`string <textseq>` version of an object, using one of the
+ following modes:
If *encoding* and/or *errors* are given, :func:`str` will decode the
*object* which can either be a byte string or a character buffer using
@@ -1266,11 +1240,9 @@ are always available. They are listed here in alphabetical order.
Objects can specify what ``str(object)`` returns by defining a :meth:`__str__`
special method.
- For more information on strings see :ref:`typesseq` which describes sequence
- functionality (strings are sequences), and also the string-specific methods
- described in the :ref:`string-methods` section. To output formatted strings,
- see the :ref:`string-formatting` section. In addition see the
- :ref:`stringservices` section.
+ For more information on strings and string methods, see the :ref:`textseq`
+ section. To output formatted strings, see the :ref:`string-formatting`
+ section. In addition, see the :ref:`stringservices` section.
.. function:: sum(iterable[, start])
@@ -1330,26 +1302,24 @@ are always available. They are listed here in alphabetical order.
Accordingly, :func:`super` is undefined for implicit lookups using statements or
operators such as ``super()[name]``.
- Also note that :func:`super` is not limited to use inside methods. The two
- argument form specifies the arguments exactly and makes the appropriate
- references. The zero argument form automatically searches the stack frame
- for the class (``__class__``) and the first argument.
+ Also note that, aside from the zero argument form, :func:`super` is not
+ limited to use inside methods. The two argument form specifies the
+ arguments exactly and makes the appropriate references. The zero
+ argument form only works inside a class definition, as the compiler fills
+ in the necessary details to correctly retrieve the class being defined,
+ as well as accessing the current instance for ordinary methods.
For practical suggestions on how to design cooperative classes using
:func:`super`, see `guide to using super()
<http://rhettinger.wordpress.com/2011/05/26/super-considered-super/>`_.
+.. _func-tuple:
.. function:: tuple([iterable])
+ :noindex:
- Return a tuple whose items are the same and in the same order as *iterable*'s
- items. *iterable* may be a sequence, a container that supports iteration, or an
- iterator object. If *iterable* is already a tuple, it is returned unchanged.
- For instance, ``tuple('abc')`` returns ``('a', 'b', 'c')`` and ``tuple([1, 2,
- 3])`` returns ``(1, 2, 3)``. If no argument is given, returns a new empty
- tuple, ``()``.
-
- :class:`tuple` is an immutable sequence type, as documented in :ref:`typesseq`.
+ Rather than being a function, :class:`tuple` is actually an immutable
+ sequence type, as documented in :ref:`typesseq-tuple` and :ref:`typesseq`.
.. function:: type(object)
@@ -1382,6 +1352,8 @@ are always available. They are listed here in alphabetical order.
...
>>> X = type('X', (object,), dict(a=1))
+ See also :ref:`bltin-type-objects`.
+
.. function:: vars([object])
@@ -1438,7 +1410,7 @@ are always available. They are listed here in alphabetical order.
True
-.. function:: __import__(name, globals={}, locals={}, fromlist=[], level=-1)
+.. function:: __import__(name, globals=None, locals=None, fromlist=(), level=0)
.. index::
statement: import
@@ -1453,8 +1425,9 @@ are always available. They are listed here in alphabetical order.
replaced (by importing the :mod:`builtins` module and assigning to
``builtins.__import__``) in order to change semantics of the
:keyword:`import` statement, but nowadays it is usually simpler to use import
- hooks (see :pep:`302`). Direct use of :func:`__import__` is rare, except in
- cases where you want to import a module whose name is only known at runtime.
+ hooks (see :pep:`302`) to attain the same goals. Direct use of
+ :func:`__import__` is entirely discouraged in favor of
+ :func:`importlib.import_module`.
The function imports the module *name*, potentially using the given *globals*
and *locals* to determine how to interpret the name in a package context.
@@ -1463,13 +1436,11 @@ are always available. They are listed here in alphabetical order.
not use its *locals* argument at all, and uses its *globals* only to
determine the package context of the :keyword:`import` statement.
- *level* specifies whether to use absolute or relative imports. ``0``
- means only perform absolute imports. Positive values for *level* indicate the
- number of parent directories to search relative to the directory of the
- module calling :func:`__import__`. Negative values attempt both an implicit
- relative import and an absolute import (usage of negative values for *level*
- are strongly discouraged as future versions of Python do not support such
- values). Import statements only use values of 0 or greater.
+ *level* specifies whether to use absolute or relative imports. ``0`` (the
+ default) means only perform absolute imports. Positive values for
+ *level* indicate the number of parent directories to search relative to the
+ directory of the module calling :func:`__import__` (see :pep:`328` for the
+ details).
When the *name* variable is of the form ``package.module``, normally, the
top-level package (the name up till the first dot) is returned, *not* the
@@ -1502,6 +1473,10 @@ are always available. They are listed here in alphabetical order.
If you simply want to import a module (potentially within a package) by name,
use :func:`importlib.import_module`.
+ .. versionchanged:: 3.3
+ Negative values for *level* are no longer supported (which also changes
+ the default value to 0).
+
.. rubric:: Footnotes
diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst
index 04743d3..f5c6608 100644
--- a/Doc/library/functools.rst
+++ b/Doc/library/functools.rst
@@ -40,7 +40,7 @@ The :mod:`functools` module defines the following functions:
.. versionadded:: 3.2
-.. decorator:: lru_cache(maxsize=100)
+.. decorator:: lru_cache(maxsize=128, typed=False)
Decorator to wrap a function with a memoizing callable that saves up to the
*maxsize* most recent calls. It can save time when an expensive or I/O bound
@@ -49,8 +49,13 @@ The :mod:`functools` module defines the following functions:
Since a dictionary is used to cache results, the positional and keyword
arguments to the function must be hashable.
- If *maxsize* is set to None, the LRU feature is disabled and the cache
- can grow without bound.
+ If *maxsize* is set to None, the LRU feature is disabled and the cache can
+ grow without bound. The LRU feature performs best when *maxsize* is a
+ power-of-two.
+
+ If *typed* is set to True, function arguments of different types will be
+ cached separately. For example, ``f(3)`` and ``f(3.0)`` will be treated
+ as distinct calls with distinct results.
To help measure the effectiveness of the cache and tune the *maxsize*
parameter, the wrapped function is instrumented with a :func:`cache_info`
@@ -67,8 +72,8 @@ The :mod:`functools` module defines the following functions:
An `LRU (least recently used) cache
<http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used>`_ works
- best when more recent calls are the best predictors of upcoming calls (for
- example, the most popular articles on a news server tend to change daily).
+ best when the most recent calls are the best predictors of upcoming calls (for
+ example, the most popular articles on a news server tend to change each day).
The cache's size limit assures that the cache does not grow without bound on
long-running processes such as web servers.
@@ -111,6 +116,9 @@ The :mod:`functools` module defines the following functions:
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ Added the *typed* option.
+
.. decorator:: total_ordering
Given a class defining one or more rich comparison ordering methods, this
diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst
index 0281bb7..41bda1e 100644
--- a/Doc/library/gc.rst
+++ b/Doc/library/gc.rst
@@ -153,8 +153,8 @@ The :mod:`gc` module provides the following functions:
.. versionadded:: 3.1
-The following variable is provided for read-only access (you can mutate its
-value but should not rebind it):
+The following variables are provided for read-only access (you can mutate the
+values but should not rebind them):
.. data:: garbage
@@ -183,6 +183,41 @@ value but should not rebind it):
:const:`DEBUG_UNCOLLECTABLE` is set, in addition all uncollectable objects
are printed.
+.. data:: callbacks
+
+ A list of callbacks that will be invoked by the garbage collector before and
+ after collection. The callbacks will be called with two arguments,
+ *phase* and *info*.
+
+ *phase* can be one of two values:
+
+ "start": The garbage collection is about to start.
+
+ "stop": The garbage collection has finished.
+
+ *info* is a dict providing more information for the callback. The following
+ keys are currently defined:
+
+ "generation": The oldest generation being collected.
+
+ "collected": When *phase* is "stop", the number of objects
+ successfully collected.
+
+ "uncollectable": When *phase* is "stop", the number of objects
+ that could not be collected and were put in :data:`garbage`.
+
+ Applications can add their own callbacks to this list. The primary
+ use cases are:
+
+ Gathering statistics about garbage collection, such as how often
+ various generations are collected, and how long the collection
+ takes.
+
+ Allowing applications to identify and clear their own uncollectable
+ types when they appear in :data:`garbage`.
+
+ .. versionadded:: 3.3
+
The following constants are provided for use with :func:`set_debug`:
diff --git a/Doc/library/gettext.rst b/Doc/library/gettext.rst
index 0fa022c..825311b 100644
--- a/Doc/library/gettext.rst
+++ b/Doc/library/gettext.rst
@@ -185,10 +185,13 @@ class can also install themselves in the built-in namespace as the function
translation object from the cache; the actual instance data is still shared with
the cache.
- If no :file:`.mo` file is found, this function raises :exc:`IOError` if
+ If no :file:`.mo` file is found, this function raises :exc:`OSError` if
*fallback* is false (which is the default), and returns a
:class:`NullTranslations` instance if *fallback* is true.
+ .. versionchanged:: 3.3
+ :exc:`IOError` used to be raised instead of :exc:`OSError`.
+
.. function:: install(domain, localedir=None, codeset=None, names=None)
@@ -342,7 +345,7 @@ The entire set of key/value pairs are placed into a dictionary and set as the
If the :file:`.mo` file's magic number is invalid, or if other problems occur
while reading the file, instantiating a :class:`GNUTranslations` class can raise
-:exc:`IOError`.
+:exc:`OSError`.
The following methods are overridden from the base class implementation:
diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst
index 9422ea9..50d0462 100644
--- a/Doc/library/gzip.rst
+++ b/Doc/library/gzip.rst
@@ -13,9 +13,11 @@ like the GNU programs :program:`gzip` and :program:`gunzip` would.
The data compression is provided by the :mod:`zlib` module.
-The :mod:`gzip` module provides the :class:`GzipFile` class. The :class:`GzipFile`
-class reads and writes :program:`gzip`\ -format files, automatically compressing
-or decompressing the data so that it looks like an ordinary :term:`file object`.
+The :mod:`gzip` module provides the :class:`GzipFile` class, as well as the
+:func:`.open`, :func:`compress` and :func:`decompress` convenience functions.
+The :class:`GzipFile` class reads and writes :program:`gzip`\ -format files,
+automatically compressing or decompressing the data so that it looks like an
+ordinary :term:`file object`.
Note that additional file formats which can be decompressed by the
:program:`gzip` and :program:`gunzip` programs, such as those produced by
@@ -24,6 +26,34 @@ Note that additional file formats which can be decompressed by the
The module defines the following items:
+.. function:: open(filename, mode='rb', compresslevel=9, encoding=None, errors=None, newline=None)
+
+ Open a gzip-compressed file in binary or text mode, returning a :term:`file
+ object`.
+
+ The *filename* argument can be an actual filename (a :class:`str` or
+ :class:`bytes` object), or an existing file object to read from or write to.
+
+ The *mode* argument can be any of ``'r'``, ``'rb'``, ``'a'``, ``'ab'``,
+ ``'w'``, or ``'wb'`` for binary mode, or ``'rt'``, ``'at'``, or ``'wt'`` for
+ text mode. The default is ``'rb'``.
+
+ The *compresslevel* argument is an integer from 1 to 9, as for the
+ :class:`GzipFile` constructor.
+
+ For binary mode, this function is equivalent to the :class:`GzipFile`
+ constructor: ``GzipFile(filename, mode, compresslevel)``. In this case, the
+ *encoding*, *errors* and *newline* arguments must not be provided.
+
+ For text mode, a :class:`GzipFile` object is created, and wrapped in an
+ :class:`io.TextIOWrapper` instance with the specified encoding, error
+ handling behavior, and line ending(s).
+
+ .. versionchanged:: 3.3
+ Added support for *filename* being a file object, support for text mode,
+ and the *encoding*, *errors* and *newline* arguments.
+
+
.. class:: GzipFile(filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None)
Constructor for the :class:`GzipFile` class, which simulates most of the
@@ -46,9 +76,9 @@ The module defines the following items:
or ``'wb'``, depending on whether the file will be read or written. The default
is the mode of *fileobj* if discernible; otherwise, the default is ``'rb'``.
- Note that the file is always opened in binary mode; text mode is not
- supported. If you need to read a compressed file in text mode, wrap your
- :class:`GzipFile` with an :class:`io.TextIOWrapper`.
+ Note that the file is always opened in binary mode. To open a compressed file
+ in text mode, use :func:`.open` (or wrap your :class:`GzipFile` with an
+ :class:`io.TextIOWrapper`).
The *compresslevel* argument is an integer from ``1`` to ``9`` controlling the
level of compression; ``1`` is fastest and produces the least compression, and
@@ -71,7 +101,7 @@ The module defines the following items:
:class:`GzipFile` supports the :class:`io.BufferedIOBase` interface,
including iteration and the :keyword:`with` statement. Only the
- :meth:`read1` and :meth:`truncate` methods aren't implemented.
+ :meth:`truncate` method isn't implemented.
:class:`GzipFile` also provides the following method:
@@ -93,12 +123,9 @@ The module defines the following items:
.. versionchanged:: 3.2
Support for unseekable files was added.
+ .. versionchanged:: 3.3
+ The :meth:`io.BufferedIOBase.read1` method is now implemented.
-.. function:: open(filename, mode='rb', compresslevel=9)
-
- This is a shorthand for ``GzipFile(filename,`` ``mode,`` ``compresslevel)``.
- The *filename* argument is required; *mode* defaults to ``'rb'`` and
- *compresslevel* defaults to ``9``.
.. function:: compress(data, compresslevel=9)
diff --git a/Doc/library/hmac.rst b/Doc/library/hmac.rst
index eff2724..0706ff4 100644
--- a/Doc/library/hmac.rst
+++ b/Doc/library/hmac.rst
@@ -38,6 +38,13 @@ An HMAC object has the following methods:
given to the constructor. It may contain non-ASCII bytes, including NUL
bytes.
+ .. warning::
+
+ When comparing the output of :meth:`digest` to an externally-supplied
+ digest during a verification routine, it is recommended to use the
+ :func:`compare_digest` function instead of the ``==`` operator
+ to reduce the vulnerability to timing attacks.
+
.. method:: HMAC.hexdigest()
@@ -45,6 +52,13 @@ An HMAC object has the following methods:
length containing only hexadecimal digits. This may be used to exchange the
value safely in email or other non-binary environments.
+ .. warning::
+
+ When comparing the output of :meth:`hexdigest` to an externally-supplied
+ digest during a verification routine, it is recommended to use the
+ :func:`compare_digest` function instead of the ``==`` operator
+ to reduce the vulnerability to timing attacks.
+
.. method:: HMAC.copy()
@@ -52,6 +66,27 @@ An HMAC object has the following methods:
compute the digests of strings that share a common initial substring.
+This module also provides the following helper function:
+
+.. function:: compare_digest(a, b)
+
+ Return ``a == b``. This function uses an approach designed to prevent
+ timing analysis by avoiding content-based short circuiting behaviour,
+ making it appropriate for cryptography. *a* and *b* must both be of the
+ same type: either :class:`str` (ASCII only, as e.g. returned by
+ :meth:`HMAC.hexdigest`), or any type that supports the buffer protocol
+ (e.g. :class:`bytes`).
+
+ .. note::
+
+ If *a* and *b* are of different lengths, or if an error occurs,
+ a timing attack could theoretically reveal information about the
+ types and lengths of *a* and *b*--but not their values.
+
+
+ .. versionadded:: 3.3
+
+
.. seealso::
Module :mod:`hashlib`
diff --git a/Doc/library/html.entities.rst b/Doc/library/html.entities.rst
index b8b4aa8..65ce817 100644
--- a/Doc/library/html.entities.rst
+++ b/Doc/library/html.entities.rst
@@ -9,11 +9,19 @@
--------------
-This module defines three dictionaries, ``name2codepoint``, ``codepoint2name``,
-and ``entitydefs``. ``entitydefs`` is used to provide the :attr:`entitydefs`
-attribute of the :class:`html.parser.HTMLParser` class. The definition provided
-here contains all the entities defined by XHTML 1.0 that can be handled using
-simple textual substitution in the Latin-1 character set (ISO-8859-1).
+This module defines four dictionaries, :data:`html5`,
+:data:`name2codepoint`, :data:`codepoint2name`, and :data:`entitydefs`.
+
+
+.. data:: html5
+
+ A dictionary that maps HTML5 named character references [#]_ to the
+ equivalent Unicode character(s), e.g. ``html5['gt;'] == '>'``.
+ Note that the trailing semicolon is included in the name (e.g. ``'gt;'``),
+ however some of the names are accepted by the standard even without the
+ semicolon: in this case the name is present with and without the ``';'``.
+
+ .. versionadded:: 3.3
.. data:: entitydefs
@@ -30,3 +38,8 @@ simple textual substitution in the Latin-1 character set (ISO-8859-1).
.. data:: codepoint2name
A dictionary that maps Unicode codepoints to HTML entity names.
+
+
+.. rubric:: Footnotes
+
+.. [#] See http://www.w3.org/TR/html5/named-character-references.html
diff --git a/Doc/library/html.parser.rst b/Doc/library/html.parser.rst
index f3c36ec..e4154ef 100644
--- a/Doc/library/html.parser.rst
+++ b/Doc/library/html.parser.rst
@@ -16,13 +16,14 @@
This module defines a class :class:`HTMLParser` which serves as the basis for
parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
-.. class:: HTMLParser(strict=True)
+.. class:: HTMLParser(strict=False)
- Create a parser instance. If *strict* is ``True`` (the default), invalid
- HTML results in :exc:`~html.parser.HTMLParseError` exceptions [#]_. If
- *strict* is ``False``, the parser uses heuristics to make a best guess at
- the intention of any invalid HTML it encounters, similar to the way most
- browsers do. Using ``strict=False`` is advised.
+ Create a parser instance. If *strict* is ``False`` (the default), the parser
+ will accept and parse invalid markup. If *strict* is ``True`` the parser
+ will raise an :exc:`~html.parser.HTMLParseError` exception instead [#]_ when
+ it's not able to parse the markup.
+ The use of ``strict=True`` is discouraged and the *strict* argument is
+ deprecated.
An :class:`.HTMLParser` instance is fed HTML data and calls handler methods
when start tags, end tags, text, comments, and other markup elements are
@@ -32,7 +33,12 @@ parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
This parser does not check that end tags match start tags or call the end-tag
handler for elements which are closed implicitly by closing an outer element.
- .. versionchanged:: 3.2 *strict* keyword added
+ .. versionchanged:: 3.2
+ *strict* keyword added.
+
+ .. deprecated-removed:: 3.3 3.5
+ The *strict* argument and the strict mode have been deprecated.
+ The parser is now able to accept and parse invalid markup too.
An exception is defined as well:
@@ -46,6 +52,10 @@ An exception is defined as well:
detected, and :attr:`offset` is the number of characters into the line at
which the construct starts.
+ .. deprecated-removed:: 3.3 3.5
+ This exception has been deprecated because it's never raised by the parser
+ (when the default non-strict mode is used).
+
Example HTML Parser Application
-------------------------------
diff --git a/Doc/library/html.rst b/Doc/library/html.rst
index 3ad1c0c..1107ca9 100644
--- a/Doc/library/html.rst
+++ b/Doc/library/html.rst
@@ -19,3 +19,10 @@ This module defines utilities to manipulate HTML.
attribute value delimited by quotes, as in ``<a href="...">``.
.. versionadded:: 3.2
+
+--------------
+
+Submodules in the ``html`` package are:
+
+* :mod:`html.parser` -- HTML/XHTML parser with lenient parsing mode
+* :mod:`html.entities` -- HTML entity definitions
diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
index d439f24..5599dac 100644
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -343,6 +343,15 @@ and also the following constants for integer status codes:
| :const:`UPGRADE_REQUIRED` | ``426`` | HTTP Upgrade to TLS, |
| | | :rfc:`2817`, Section 6 |
+------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`PRECONDITION_REQUIRED` | ``428`` | Additional HTTP Status Codes, |
+| | | :rfc:`6585`, Section 3 |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`TOO_MANY_REQUESTS` | ``429`` | Additional HTTP Status Codes, |
+| | | :rfc:`6585`, Section 4 |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`REQUEST_HEADER_FIELDS_TOO_LARGE` | ``431`` | Additional HTTP Status Codes, |
+| | | :rfc:`6585`, Section 5 |
++------------------------------------------+---------+-----------------------------------------------------------------------+
| :const:`INTERNAL_SERVER_ERROR` | ``500`` | HTTP/1.1, `RFC 2616, Section |
| | | 10.5.1 |
| | | <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.1>`_ |
@@ -373,6 +382,12 @@ and also the following constants for integer status codes:
| :const:`NOT_EXTENDED` | ``510`` | An HTTP Extension Framework, |
| | | :rfc:`2774`, Section 7 |
+------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`NETWORK_AUTHENTICATION_REQUIRED` | ``511`` | Additional HTTP Status Codes, |
+| | | :rfc:`6585`, Section 6 |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+
+.. versionchanged:: 3.3
+ Added codes ``428``, ``429``, ``431`` and ``511`` from :rfc:`6585`.
.. data:: responses
@@ -506,6 +521,12 @@ statement.
Reads and returns the response body, or up to the next *amt* bytes.
+.. method:: HTTPResponse.readinto(b)
+
+ Reads up to the next len(b) bytes of the response body into the buffer *b*.
+ Returns the number of bytes read.
+
+ .. versionadded:: 3.3
.. method:: HTTPResponse.getheader(name, default=None)
diff --git a/Doc/library/http.cookiejar.rst b/Doc/library/http.cookiejar.rst
index cc8f251..40f24f2 100644
--- a/Doc/library/http.cookiejar.rst
+++ b/Doc/library/http.cookiejar.rst
@@ -40,7 +40,11 @@ The module defines the following exception:
.. exception:: LoadError
Instances of :class:`FileCookieJar` raise this exception on failure to load
- cookies from a file. :exc:`LoadError` is a subclass of :exc:`IOError`.
+ cookies from a file. :exc:`LoadError` is a subclass of :exc:`OSError`.
+
+ .. versionchanged:: 3.3
+ LoadError was made a subclass of :exc:`OSError` instead of
+ :exc:`IOError`.
The following classes are provided:
@@ -257,9 +261,12 @@ contained :class:`Cookie` objects.
Arguments are as for :meth:`save`.
The named file must be in the format understood by the class, or
- :exc:`LoadError` will be raised. Also, :exc:`IOError` may be raised, for
+ :exc:`LoadError` will be raised. Also, :exc:`OSError` may be raised, for
example if the file does not exist.
+ .. versionchanged:: 3.3
+ :exc:`IOError` used to be raised, it is now an alias of :exc:`OSError`.
+
.. method:: FileCookieJar.revert(filename=None, ignore_discard=False, ignore_expires=False)
diff --git a/Doc/library/http.cookies.rst b/Doc/library/http.cookies.rst
index 5ae3fd4..646f2e8 100644
--- a/Doc/library/http.cookies.rst
+++ b/Doc/library/http.cookies.rst
@@ -22,9 +22,12 @@ many current day browsers and servers have relaxed parsing rules when comes to
Cookie handling. As a result, the parsing rules used are a bit less strict.
The character set, :data:`string.ascii_letters`, :data:`string.digits` and
-``!#$%&'*+-.^_`|~`` denote the set of valid characters allowed by this module
+``!#$%&'*+-.^_`|~:`` denote the set of valid characters allowed by this module
in Cookie name (as :attr:`~Morsel.key`).
+.. versionchanged:: 3.3
+ Allowed ':' as a valid Cookie name character.
+
.. note::
diff --git a/Doc/library/http.rst b/Doc/library/http.rst
new file mode 100644
index 0000000..a387a37
--- /dev/null
+++ b/Doc/library/http.rst
@@ -0,0 +1,11 @@
+:mod:`http` --- HTTP modules
+============================
+
+``http`` is a package that collects several modules for working with the
+HyperText Transfer Protocol:
+
+* :mod:`http.client` is a low-level HTTP protocol client; for high-level URL
+ opening use :mod:`urllib.request`
+* :mod:`http.server` contains basic HTTP server classes based on :mod:`socketserver`
+* :mod:`http.cookies` has utilities for implementing state management with cookies
+* :mod:`http.cookiejar` provides persistence of cookies
diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst
index 300e332..cbad3ed 100644
--- a/Doc/library/http.server.rst
+++ b/Doc/library/http.server.rst
@@ -179,19 +179,30 @@ of which this module provides three different variants:
.. method:: send_response(code, message=None)
- Sends a response header and logs the accepted request. The HTTP response
- line is sent, followed by *Server* and *Date* headers. The values for
- these two headers are picked up from the :meth:`version_string` and
- :meth:`date_time_string` methods, respectively.
+ Adds a response header to the headers buffer and logs the accepted
+ request. The HTTP response line is written to the internal buffer,
+ followed by *Server* and *Date* headers. The values for these two headers
+ are picked up from the :meth:`version_string` and
+ :meth:`date_time_string` methods, respectively. If the server does not
+ intend to send any other headers using the :meth:`send_header` method,
+ then :meth:`send_response` should be followed by a :meth:`end_headers`
+ call.
+
+ .. versionchanged:: 3.3
+ Headers are stored to an internal buffer and :meth:`end_headers`
+ needs to be called explicitly.
+
.. method:: send_header(keyword, value)
- Stores the HTTP header to an internal buffer which will be written to the
- output stream when :meth:`end_headers` method is invoked.
- *keyword* should specify the header keyword, with *value*
- specifying its value.
+ Adds the HTTP header to an internal buffer which will be written to the
+ output stream when either :meth:`end_headers` or :meth:`flush_headers` is
+ invoked. *keyword* should specify the header keyword, with *value*
+ specifying its value. Note that, after the send_header calls are done,
+ :meth:`end_headers` MUST BE called in order to complete the operation.
- .. versionchanged:: 3.2 Storing the headers in an internal buffer
+ .. versionchanged:: 3.2
+ Headers are stored in an internal buffer.
.. method:: send_response_only(code, message=None)
@@ -205,10 +216,19 @@ of which this module provides three different variants:
.. method:: end_headers()
- Write the buffered HTTP headers to the output stream and send a blank
- line, indicating the end of the HTTP headers in the response.
+ Adds a blank line
+ (indicating the end of the HTTP headers in the response)
+ to the headers buffer and calls :meth:`flush_headers()`.
+
+ .. versionchanged:: 3.2
+ The buffered headers are written to the output stream.
+
+ .. method:: flush_headers()
+
+ Finally send the headers to the output stream and flush the internal
+ headers buffer.
- .. versionchanged:: 3.2 Writing the buffered headers to the output stream.
+ .. versionadded:: 3.3
.. method:: log_request(code='-', size='-')
@@ -250,8 +270,11 @@ of which this module provides three different variants:
.. method:: address_string()
- Returns the client address, formatted for logging. A name lookup is
- performed on the client's IP address.
+ Returns the client address.
+
+ .. versionchanged:: 3.3
+ Previously, a name lookup was performed. To avoid name resolution
+ delays, it now always returns the IP address.
.. class:: SimpleHTTPRequestHandler(request, client_address, server)
@@ -299,7 +322,7 @@ of which this module provides three different variants:
response if the :func:`listdir` fails.
If the request was mapped to a file, it is opened and the contents are
- returned. Any :exc:`IOError` exception in opening the requested file is
+ returned. Any :exc:`OSError` exception in opening the requested file is
mapped to a ``404``, ``'File not found'`` error. Otherwise, the content
type is guessed by calling the :meth:`guess_type` method, which in turn
uses the *extensions_map* variable.
@@ -378,3 +401,9 @@ the previous example, this serves files relative to the current directory. ::
Note that CGI scripts will be run with UID of user nobody, for security
reasons. Problems with the CGI script will be translated to error 403.
+
+:class:`CGIHTTPRequestHandler` can be enabled in the command line by passing
+the ``--cgi`` option.::
+
+ python -m http.server --cgi 8000
+
diff --git a/Doc/library/imaplib.rst b/Doc/library/imaplib.rst
index 3f45c95..fefb284 100644
--- a/Doc/library/imaplib.rst
+++ b/Doc/library/imaplib.rst
@@ -64,14 +64,21 @@ Three exceptions are defined as attributes of the :class:`IMAP4` class:
There's also a subclass for secure connections:
-.. class:: IMAP4_SSL(host='', port=IMAP4_SSL_PORT, keyfile=None, certfile=None)
+.. class:: IMAP4_SSL(host='', port=IMAP4_SSL_PORT, keyfile=None, certfile=None, ssl_context=None)
This is a subclass derived from :class:`IMAP4` that connects over an SSL
encrypted socket (to use this class you need a socket module that was compiled
with SSL support). If *host* is not specified, ``''`` (the local host) is used.
If *port* is omitted, the standard IMAP4-over-SSL port (993) is used. *keyfile*
and *certfile* are also optional - they can contain a PEM formatted private key
- and certificate chain file for the SSL connection.
+ and certificate chain file for the SSL connection. *ssl_context* parameter is a
+ :class:`ssl.SSLContext` object which allows bundling SSL configuration
+ options, certificates and private keys into a single (potentially long-lived)
+ structure. Note that the *keyfile*/*certfile* parameters are mutually exclusive with *ssl_context*,
+ a :class:`ValueError` is thrown if *keyfile*/*certfile* is provided along with *ssl_context*.
+
+ .. versionchanged:: 3.3
+ *ssl_context* parameter added.
The second subclass allows for connections created by a child process:
@@ -106,13 +113,15 @@ The following utility functions are defined:
.. function:: Time2Internaldate(date_time)
- Convert *date_time* to an IMAP4 ``INTERNALDATE`` representation. The
- return value is a string in the form: ``"DD-Mmm-YYYY HH:MM:SS
- +HHMM"`` (including double-quotes). The *date_time* argument can be a
- number (int or float) representing seconds since epoch (as returned
- by :func:`time.time`), a 9-tuple representing local time (as returned by
- :func:`time.localtime`), or a double-quoted string. In the last case, it
- is assumed to already be in the correct format.
+ Convert *date_time* to an IMAP4 ``INTERNALDATE`` representation.
+ The return value is a string in the form: ``"DD-Mmm-YYYY HH:MM:SS
+ +HHMM"`` (including double-quotes). The *date_time* argument can
+ be a number (int or float) representing seconds since epoch (as
+ returned by :func:`time.time`), a 9-tuple representing local time
+ an instance of :class:`time.struct_time` (as returned by
+ :func:`time.localtime`), an aware instance of
+ :class:`datetime.datetime`, or a double-quoted string. In the last
+ case, it is assumed to already be in the correct format.
Note that IMAP4 message numbers change as the mailbox changes; in particular,
after an ``EXPUNGE`` command performs deletions the remaining messages are
diff --git a/Doc/library/imp.rst b/Doc/library/imp.rst
index 1345b25..5cadda9 100644
--- a/Doc/library/imp.rst
+++ b/Doc/library/imp.rst
@@ -30,6 +30,9 @@ This module provides an interface to the mechanisms used to implement the
:const:`PY_SOURCE`, :const:`PY_COMPILED`, or :const:`C_EXTENSION`, described
below.
+ .. deprecated:: 3.3
+ Use the constants defined on :mod:`importlib.machinery` instead.
+
.. function:: find_module(name[, path])
@@ -69,6 +72,9 @@ This module provides an interface to the mechanisms used to implement the
then use :func:`find_module` with the *path* argument set to ``P.__path__``.
When *P* itself has a dotted name, apply this recipe recursively.
+ .. deprecated:: 3.3
+ Use :func:`importlib.find_loader` instead.
+
.. function:: load_module(name, file, pathname, description)
@@ -90,6 +96,10 @@ This module provides an interface to the mechanisms used to implement the
it was not ``None``, even when an exception is raised. This is best done
using a :keyword:`try` ... :keyword:`finally` statement.
+ .. deprecated:: 3.3
+ Unneeded as loaders should be used to load modules and
+ :func:`find_module` is deprecated.
+
.. function:: new_module(name)
@@ -97,37 +107,6 @@ This module provides an interface to the mechanisms used to implement the
in ``sys.modules``.
-.. function:: lock_held()
-
- Return ``True`` if the import lock is currently held, else ``False``. On
- platforms without threads, always return ``False``.
-
- On platforms with threads, a thread executing an import holds an internal lock
- until the import is complete. This lock blocks other threads from doing an
- import until the original import completes, which in turn prevents other threads
- from seeing incomplete module objects constructed by the original thread while
- in the process of completing its import (and the imports, if any, triggered by
- that).
-
-
-.. function:: acquire_lock()
-
- Acquire the interpreter's import lock for the current thread. This lock should
- be used by import hooks to ensure thread-safety when importing modules.
-
- Once a thread has acquired the import lock, the same thread may acquire it
- again without blocking; the thread must release it once for each time it has
- acquired it.
-
- On platforms without threads, this function does nothing.
-
-
-.. function:: release_lock()
-
- Release the interpreter's import lock. On platforms without threads, this
- function does nothing.
-
-
.. function:: reload(module)
Reload a previously imported *module*. The argument must be a module object, so
@@ -201,14 +180,19 @@ file paths.
source *path*. For example, if *path* is ``/foo/bar/baz.py`` the return
value would be ``/foo/bar/__pycache__/baz.cpython-32.pyc`` for Python 3.2.
The ``cpython-32`` string comes from the current magic tag (see
- :func:`get_tag`). The returned path will end in ``.pyc`` when
- ``__debug__`` is True or ``.pyo`` for an optimized Python
+ :func:`get_tag`; if :attr:`sys.implementation.cache_tag` is not defined then
+ :exc:`NotImplementedError` will be raised). The returned path will end in
+ ``.pyc`` when ``__debug__`` is True or ``.pyo`` for an optimized Python
(i.e. ``__debug__`` is False). By passing in True or False for
*debug_override* you can override the system's value for ``__debug__`` for
extension selection.
*path* need not exist.
+ .. versionchanged:: 3.3
+ If :attr:`sys.implementation.cache_tag` is ``None``, then
+ :exc:`NotImplementedError` is raised.
+
.. function:: source_from_cache(path)
@@ -216,7 +200,13 @@ file paths.
file path. For example, if *path* is
``/foo/bar/__pycache__/baz.cpython-32.pyc`` the returned path would be
``/foo/bar/baz.py``. *path* need not exist, however if it does not conform
- to :pep:`3147` format, a ``ValueError`` is raised.
+ to :pep:`3147` format, a ``ValueError`` is raised. If
+ :attr:`sys.implementation.cache_tag` is not defined,
+ :exc:`NotImplementedError` is raised.
+
+ .. versionchanged:: 3.3
+ Raise :exc:`NotImplementedError` when
+ :attr:`sys.implementation.cache_tag` is not defined.
.. function:: get_tag()
@@ -224,6 +214,64 @@ file paths.
Return the :pep:`3147` magic tag string matching this version of Python's
magic number, as returned by :func:`get_magic`.
+ .. note::
+ You may use :attr:`sys.implementation.cache_tag` directly starting
+ in Python 3.3.
+
+
+The following functions help interact with the import system's internal
+locking mechanism. Locking semantics of imports are an implementation
+detail which may vary from release to release. However, Python ensures
+that circular imports work without any deadlocks.
+
+
+.. function:: lock_held()
+
+ Return ``True`` if the global import lock is currently held, else
+ ``False``. On platforms without threads, always return ``False``.
+
+ On platforms with threads, a thread executing an import first holds a
+ global import lock, then sets up a per-module lock for the rest of the
+ import. This blocks other threads from importing the same module until
+ the original import completes, preventing other threads from seeing
+ incomplete module objects constructed by the original thread. An
+ exception is made for circular imports, which by construction have to
+ expose an incomplete module object at some point.
+
+.. versionchanged:: 3.3
+ The locking scheme has changed to per-module locks for
+ the most part. A global import lock is kept for some critical tasks,
+ such as initializing the per-module locks.
+
+
+.. function:: acquire_lock()
+
+ Acquire the interpreter's global import lock for the current thread.
+ This lock should be used by import hooks to ensure thread-safety when
+ importing modules.
+
+ Once a thread has acquired the import lock, the same thread may acquire it
+ again without blocking; the thread must release it once for each time it has
+ acquired it.
+
+ On platforms without threads, this function does nothing.
+
+.. versionchanged:: 3.3
+ The locking scheme has changed to per-module locks for
+ the most part. A global import lock is kept for some critical tasks,
+ such as initializing the per-module locks.
+
+
+.. function:: release_lock()
+
+ Release the interpreter's global import lock. On platforms without
+ threads, this function does nothing.
+
+.. versionchanged:: 3.3
+ The locking scheme has changed to per-module locks for
+ the most part. A global import lock is kept for some critical tasks,
+ such as initializing the per-module locks.
+
The following constants with integer values, defined in this module, are used
to indicate the search result of :func:`find_module`.
@@ -233,31 +281,43 @@ to indicate the search result of :func:`find_module`.
The module was found as a source file.
+ .. deprecated:: 3.3
+
.. data:: PY_COMPILED
The module was found as a compiled code object file.
+ .. deprecated:: 3.3
+
.. data:: C_EXTENSION
The module was found as dynamically loadable shared library.
+ .. deprecated:: 3.3
+
.. data:: PKG_DIRECTORY
The module was found as a package directory.
+ .. deprecated:: 3.3
+
.. data:: C_BUILTIN
The module was found as a built-in module.
+ .. deprecated:: 3.3
+
.. data:: PY_FROZEN
The module was found as a frozen module.
+ .. deprecated:: 3.3
+
.. class:: NullImporter(path_string)
diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst
index 1649063..04097f0 100644
--- a/Doc/library/importlib.rst
+++ b/Doc/library/importlib.rst
@@ -63,7 +63,7 @@ Details on custom importers can be found in :pep:`302`.
Functions
---------
-.. function:: __import__(name, globals={}, locals={}, fromlist=list(), level=0)
+.. function:: __import__(name, globals=None, locals=None, fromlist=(), level=0)
An implementation of the built-in :func:`__import__` function.
@@ -86,6 +86,30 @@ Functions
that was imported (e.g. ``pkg.mod``), while :func:`__import__` returns the
top-level package or module (e.g. ``pkg``).
+.. function:: find_loader(name, path=None)
+
+ Find the loader for a module, optionally within the specified *path*. If the
+ module is in :attr:`sys.modules`, then ``sys.modules[name].__loader__`` is
+ returned (unless the loader would be ``None``, in which case
+ :exc:`ValueError` is raised). Otherwise a search using :attr:`sys.meta_path`
+ is done. ``None`` is returned if no loader is found.
+
+ A dotted name does not have its parent's implicitly imported. If that is
+ desired (although not nessarily required to find the loader, it will most
+ likely be needed if the loader actually is used to load the module), then
+ you will have to import the packages containing the module prior to calling
+ this function.
+
+.. function:: invalidate_caches()
+
+ Invalidate the internal caches of finders stored at
+ :data:`sys.meta_path`. If a finder implements ``invalidate_caches()`` then it
+ will be called to perform the invalidation. This function may be needed if
+ some modules are installed while your program is running and you expect the
+ program to notice the changes.
+
+ .. versionadded:: 3.3
+
:mod:`importlib.abc` -- Abstract base classes related to import
---------------------------------------------------------------
@@ -97,19 +121,90 @@ The :mod:`importlib.abc` module contains all of the core abstract base classes
used by :keyword:`import`. Some subclasses of the core abstract base classes
are also provided to help in implementing the core ABCs.
+ABC hierarchy::
+
+ object
+ +-- Finder (deprecated)
+ | +-- MetaPathFinder
+ | +-- PathEntryFinder
+ +-- Loader
+ +-- ResourceLoader --------+
+ +-- InspectLoader |
+ +-- ExecutionLoader --+
+ +-- FileLoader
+ +-- SourceLoader
+ +-- PyLoader (deprecated)
+ +-- PyPycLoader (deprecated)
+
.. class:: Finder
- An abstract base class representing a :term:`finder`.
- See :pep:`302` for the exact definition for a finder.
+ An abstract base class representing a :term:`finder`.
+
+ .. deprecated:: 3.3
+ Use :class:`MetaPathFinder` or :class:`PathEntryFinder` instead.
+
+ .. method:: find_module(fullname, path=None)
+
+ An abstact method for finding a :term:`loader` for the specified
+ module. Originally specified in :pep:`302`, this method was meant
+ for use in :data:`sys.meta_path` and in the path-based import subsystem.
+
+
+.. class:: MetaPathFinder
+
+ An abstract base class representing a :term:`meta path finder`. For
+ compatibility, this is a subclass of :class:`Finder`.
+
+ .. versionadded:: 3.3
+
+ .. method:: find_module(fullname, path)
+
+ An abstract method for finding a :term:`loader` for the specified
+ module. If this is a top-level import, *path* will be ``None``.
+ Otheriwse, this is a search for a subpackage or module and *path*
+ will be the value of :attr:`__path__` from the parent
+ package. If a loader cannot be found, ``None`` is returned.
+
+ .. method:: invalidate_caches()
+
+ An optional method which, when called, should invalidate any internal
+ cache used by the finder. Used by :func:`importlib.invalidate_caches`
+ when invalidating the caches of all finders on :data:`sys.meta_path`.
+
- .. method:: find_module(fullname, path=None)
+.. class:: PathEntryFinder
- An abstract method for finding a :term:`loader` for the specified
- module. If the :term:`finder` is found on :data:`sys.meta_path` and the
- module to be searched for is a subpackage or module then *path* will
- be the value of :attr:`__path__` from the parent package. If a loader
- cannot be found, ``None`` is returned.
+ An abstract base class representing a :term:`path entry finder`. Though
+ it bears some similarities to :class:`MetaPathFinder`, ``PathEntryFinder``
+ is meant for use only within the path-based import subsystem provided
+ by :class:`PathFinder`. This ABC is a subclass of :class:`Finder` for
+ compatibility.
+
+ .. versionadded:: 3.3
+
+ .. method:: find_loader(fullname):
+
+ An abstract method for finding a :term:`loader` for the specified
+ module. Returns a 2-tuple of ``(loader, portion)`` where ``portion``
+ is a sequence of file system locations contributing to part of a namespace
+ package. The loader may be ``None`` while specifying ``portion`` to
+ signify the contribution of the file system locations to a namespace
+ package. An empty list can be used for ``portion`` to signify the loader
+ is not part of a package. If ``loader`` is ``None`` and ``portion`` is
+ the empty list then no loader or location for a namespace package were
+ found (i.e. failure to find anything for the module).
+
+ .. method:: find_module(fullname):
+
+ A concrete implementation of :meth:`Finder.find_module` which is
+ equivalent to ``self.find_loader(fullname)[0]``.
+
+ .. method:: invalidate_caches()
+
+ An optional method which, when called, should invalidate any internal
+ cache used by the finder. Used by :meth:`PathFinder.invalidate_caches`
+ when invalidating the caches of all cached finders.
.. class:: Loader
@@ -159,6 +254,13 @@ are also provided to help in implementing the core ABCs.
(This is not set by the built-in import machinery,
but it should be set whenever a :term:`loader` is used.)
+ .. method:: module_repr(module)
+
+ An abstract method which when implemented calculates and returns the
+ given module's repr, as a string.
+
+ .. versionadded: 3.3
+
.. class:: ResourceLoader
@@ -224,6 +326,38 @@ are also provided to help in implementing the core ABCs.
module.
+.. class:: FileLoader(fullname, path)
+
+ An abstract base class which inherits from :class:`ResourceLoader` and
+ :class:`ExecutionLoader`, providing concreate implementations of
+ :meth:`ResourceLoader.get_data` and :meth:`ExecutionLoader.get_filename`.
+
+ The *fullname* argument is a fully resolved name of the module the loader is
+ to handle. The *path* argument is the path to the file for the module.
+
+ .. versionadded:: 3.3
+
+ .. attribute:: name
+
+ The name of the module the loader can handle.
+
+ .. attribute:: path
+
+ Path to the file of the module.
+
+ .. method:: load_module(fullname)
+
+ Calls super's ``load_module()``.
+
+ .. method:: get_filename(fullname)
+
+ Returns :attr:`path`.
+
+ .. method:: get_data(path)
+
+ Returns the open, binary file for *path*.
+
+
.. class:: SourceLoader
An abstract base class for implementing source (and optionally bytecode)
@@ -243,12 +377,31 @@ are also provided to help in implementing the core ABCs.
optimization to speed up loading by removing the parsing step of Python's
compiler, and so no bytecode-specific API is exposed.
- .. method:: path_mtime(self, path)
+ .. method:: path_stats(path)
+
+ Optional abstract method which returns a :class:`dict` containing
+ metadata about the specifed path. Supported dictionary keys are:
+
+ - ``'mtime'`` (mandatory): an integer or floating-point number
+ representing the modification time of the source code;
+ - ``'size'`` (optional): the size in bytes of the source code.
+
+ Any other keys in the dictionary are ignored, to allow for future
+ extensions.
+
+ .. versionadded:: 3.3
+
+ .. method:: path_mtime(path)
Optional abstract method which returns the modification time for the
specified path.
- .. method:: set_data(self, path, data)
+ .. deprecated:: 3.3
+ This method is deprecated in favour of :meth:`path_stats`. You don't
+ have to implement it, but it is still available for compatibility
+ purposes.
+
+ .. method:: set_data(path, data)
Optional abstract method which writes the specified bytes to a file
path. Any intermediate directories which do not exist are to be created
@@ -257,23 +410,25 @@ are also provided to help in implementing the core ABCs.
When writing to the path fails because the path is read-only
(:attr:`errno.EACCES`), do not propagate the exception.
- .. method:: get_code(self, fullname)
+ .. method:: get_code(fullname)
Concrete implementation of :meth:`InspectLoader.get_code`.
- .. method:: load_module(self, fullname)
+ .. method:: load_module(fullname)
Concrete implementation of :meth:`Loader.load_module`.
- .. method:: get_source(self, fullname)
+ .. method:: get_source(fullname)
Concrete implementation of :meth:`InspectLoader.get_source`.
- .. method:: is_package(self, fullname)
+ .. method:: is_package(fullname)
Concrete implementation of :meth:`InspectLoader.is_package`. A module
- is determined to be a package if its file path is a file named
- ``__init__`` when the file extension is removed.
+ is determined to be a package if its file path (as provided by
+ :meth:`ExecutionLoader.get_filename`) is a file named
+ ``__init__`` when the file extension is removed **and** the module name
+ itself does not end in ``__init__``.
.. class:: PyLoader
@@ -374,6 +529,10 @@ are also provided to help in implementing the core ABCs.
:class:`PyLoader`. Do note that this solution will not support
sourceless/bytecode-only loading; only source *and* bytecode loading.
+ .. versionchanged:: 3.3
+ Updated to parse (but not use) the new source size field in bytecode
+ files when reading and to write out the field properly when writing.
+
.. method:: source_mtime(fullname)
An abstract method which returns the modification time for the source
@@ -417,12 +576,59 @@ are also provided to help in implementing the core ABCs.
This module contains the various objects that help :keyword:`import`
find and load modules.
+.. attribute:: SOURCE_SUFFIXES
+
+ A list of strings representing the recognized file suffixes for source
+ modules.
+
+ .. versionadded:: 3.3
+
+.. attribute:: DEBUG_BYTECODE_SUFFIXES
+
+ A list of strings representing the file suffixes for non-optimized bytecode
+ modules.
+
+ .. versionadded:: 3.3
+
+.. attribute:: OPTIMIZED_BYTECODE_SUFFIXES
+
+ A list of strings representing the file suffixes for optimized bytecode
+ modules.
+
+ .. versionadded:: 3.3
+
+.. attribute:: BYTECODE_SUFFIXES
+
+ A list of strings representing the recognized file suffixes for bytecode
+ modules. Set to either :attr:`DEBUG_BYTECODE_SUFFIXES` or
+ :attr:`OPTIMIZED_BYTECODE_SUFFIXES` based on whether ``__debug__`` is true.
+
+ .. versionadded:: 3.3
+
+.. attribute:: EXTENSION_SUFFIXES
+
+ A list of strings representing the recognized file suffixes for
+ extension modules.
+
+ .. versionadded:: 3.3
+
+.. function:: all_suffixes()
+
+ Returns a combined list of strings representing all file suffixes for
+ modules recognized by the standard import machinery. This is a
+ helper for code which simply needs to know if a filesystem path
+ potentially refers to a module without needing any details on the kind
+ of module (for example, :func:`inspect.getmodulename`)
+
+ .. versionadded:: 3.3
+
+
.. class:: BuiltinImporter
An :term:`importer` for built-in modules. All known built-in modules are
listed in :data:`sys.builtin_module_names`. This class implements the
- :class:`importlib.abc.Finder` and :class:`importlib.abc.InspectLoader`
- ABCs.
+ :class:`importlib.abc.MetaPathFinder` and
+ :class:`importlib.abc.InspectLoader` ABCs.
Only class methods are defined by this class to alleviate the need for
instantiation.
@@ -431,48 +637,223 @@ find and load modules.
.. class:: FrozenImporter
An :term:`importer` for frozen modules. This class implements the
- :class:`importlib.abc.Finder` and :class:`importlib.abc.InspectLoader`
- ABCs.
+ :class:`importlib.abc.MetaPathFinder` and
+ :class:`importlib.abc.InspectLoader` ABCs.
Only class methods are defined by this class to alleviate the need for
instantiation.
+.. class:: WindowsRegistryFinder
+
+ :term:`Finder` for modules declared in the Windows registry. This class
+ implements the :class:`importlib.abc.Finder` ABC.
+
+ Only class methods are defined by this class to alleviate the need for
+ instantiation.
+
+ .. versionadded:: 3.3
+
+
.. class:: PathFinder
- :term:`Finder` for :data:`sys.path`. This class implements the
- :class:`importlib.abc.Finder` ABC.
+ A :term:`Finder` for :data:`sys.path` and package ``__path__`` attributes.
+ This class implements the :class:`importlib.abc.MetaPathFinder` ABC.
- This class does not perfectly mirror the semantics of :keyword:`import` in
- terms of :data:`sys.path`. No implicit path hooks are assumed for
- simplification of the class and its semantics.
+ Only class methods are defined by this class to alleviate the need for
+ instantiation.
- Only class methods are defined by this class to alleviate the need for
- instantiation.
+ .. classmethod:: find_module(fullname, path=None)
+
+ Class method that attempts to find a :term:`loader` for the module
+ specified by *fullname* on :data:`sys.path` or, if defined, on
+ *path*. For each path entry that is searched,
+ :data:`sys.path_importer_cache` is checked. If a non-false object is
+ found then it is used as the :term:`finder` to look for the module
+ being searched for. If no entry is found in
+ :data:`sys.path_importer_cache`, then :data:`sys.path_hooks` is
+ searched for a finder for the path entry and, if found, is stored in
+ :data:`sys.path_importer_cache` along with being queried about the
+ module. If no finder is ever found then ``None`` is both stored in
+ the cache and returned.
+
+ .. classmethod:: invalidate_caches()
+
+ Calls :meth:`importlib.abc.PathEntryFinder.invalidate_caches` on all
+ finders stored in :attr:`sys.path_importer_cache`.
+
+
+.. class:: FileFinder(path, \*loader_details)
+
+ A concrete implementation of :class:`importlib.abc.PathEntryFinder` which
+ caches results from the file system.
+
+ The *path* argument is the directory for which the finder is in charge of
+ searching.
+
+ The *loader_details* argument is a variable number of 2-item tuples each
+ containing a loader and a sequence of file suffixes the loader recognizes.
+
+ The finder will cache the directory contents as necessary, making stat calls
+ for each module search to verify the cache is not outdated. Because cache
+ staleness relies upon the granularity of the operating system's state
+ information of the file system, there is a potential race condition of
+ searching for a module, creating a new file, and then searching for the
+ module the new file represents. If the operations happen fast enough to fit
+ within the granularity of stat calls, then the module search will fail. To
+ prevent this from happening, when you create a module dynamically, make sure
+ to call :func:`importlib.invalidate_caches`.
+
+ .. versionadded:: 3.3
+
+ .. attribute:: path
+
+ The path the finder will search in.
+
+ .. method:: find_module(fullname)
+
+ Attempt to find the loader to handle *fullname* within :attr:`path`.
+
+ .. method:: invalidate_caches()
+
+ Clear out the internal cache.
+
+ .. classmethod:: path_hook(\*loader_details)
+
+ A class method which returns a closure for use on :attr:`sys.path_hooks`.
+ An instance of :class:`FileFinder` is returned by the closure using the
+ path argument given to the closure directly and *loader_details*
+ indirectly.
+
+ If the argument to the closure is not an existing directory,
+ :exc:`ImportError` is raised.
+
+
+.. class:: SourceFileLoader(fullname, path)
+
+ A concrete implementation of :class:`importlib.abc.SourceLoader` by
+ subclassing :class:`importlib.abc.FileLoader` and providing some concrete
+ implementations of other methods.
+
+ .. versionadded:: 3.3
+
+ .. attribute:: name
+
+ The name of the module that this loader will handle.
+
+ .. attribute:: path
+
+ The path to the source file.
+
+ .. method:: is_package(fullname)
+
+ Return true if :attr:`path` appears to be for a package.
+
+ .. method:: path_stats(path)
+
+ Concrete implementation of :meth:`importlib.abc.SourceLoader.path_stats`.
+
+ .. method:: set_data(path, data)
+
+ Concrete implementation of :meth:`importlib.abc.SourceLoader.set_data`.
+
+
+.. class:: SourcelessFileLoader(fullname, path)
- .. classmethod:: find_module(fullname, path=None)
+ A concrete implementation of :class:`importlib.abc.FileLoader` which can
+ import bytecode files (i.e. no source code files exist).
- Class method that attempts to find a :term:`loader` for the module
- specified by *fullname* on :data:`sys.path` or, if defined, on
- *path*. For each path entry that is searched,
- :data:`sys.path_importer_cache` is checked. If an non-false object is
- found then it is used as the :term:`finder` to look for the module
- being searched for. If no entry is found in
- :data:`sys.path_importer_cache`, then :data:`sys.path_hooks` is
- searched for a finder for the path entry and, if found, is stored in
- :data:`sys.path_importer_cache` along with being queried about the
- module. If no finder is ever found then ``None`` is returned.
+ Please note that direct use of bytecode files (and thus not source code
+ files) inhibits your modules from being usable by all Python
+ implementations or new versions of Python which change the bytecode
+ format.
+
+ .. versionadded:: 3.3
+
+ .. attribute:: name
+
+ The name of the module the loader will handle.
+
+ .. attribute:: path
+
+ The path to the bytecode file.
+
+ .. method:: is_package(fullname)
+
+ Determines if the module is a package based on :attr:`path`.
+
+ .. method:: get_code(fullname)
+
+ Returns the code object for :attr:`name` created from :attr:`path`.
+
+ .. method:: get_source(fullname)
+
+ Returns ``None`` as bytecode files have no source when this loader is
+ used.
+
+
+.. class:: ExtensionFileLoader(fullname, path)
+
+ A concrete implementation of :class:`importlib.abc.InspectLoader` for
+ extension modules.
+
+ The *fullname* argument specifies the name of the module the loader is to
+ support. The *path* argument is the path to the extension module's file.
+
+ .. versionadded:: 3.3
+
+ .. attribute:: name
+
+ Name of the module the loader supports.
+
+ .. attribute:: path
+
+ Path to the extension module.
+
+ .. method:: load_module(fullname)
+
+ Loads the extension module if and only if *fullname* is the same as
+ :attr:`name` or is ``None``.
+
+ .. method:: is_package(fullname)
+
+ Returns ``True`` if the file path points to a package's ``__init__``
+ module based on :attr:`EXTENSION_SUFFIXES`.
+
+ .. method:: get_code(fullname)
+
+ Returns ``None`` as extension modules lack a code object.
+
+ .. method:: get_source(fullname)
+
+ Returns ``None`` as extension modules do not have source code.
:mod:`importlib.util` -- Utility code for importers
---------------------------------------------------
.. module:: importlib.util
- :synopsis: Importers and path hooks
+ :synopsis: Utility code for importers
This module contains the various objects that help in the construction of
an :term:`importer`.
+.. function:: resolve_name(name, package)
+
+ Resolve a relative module name to an absolute one.
+
+ If **name** has no leading dots, then **name** is simply returned. This
+ allows for usage such as
+ ``importlib.util.resolve_name('sys', __package__)`` without doing a
+ check to see if the **package** argument is needed.
+
+ :exc:`ValueError` is raised if **name** is a relative module name but
+ package is a false value (e.g. ``None`` or the empty string).
+ :exc:`ValueError` is also raised a relative name would escape its containing
+ package (e.g. requesting ``..bacon`` from within the ``spam`` package).
+
+ .. versionadded:: 3.3
+
.. decorator:: module_for_loader
A :term:`decorator` for a :term:`loader` method,
@@ -481,22 +862,30 @@ an :term:`importer`.
signature taking two positional arguments
(e.g. ``load_module(self, module)``) for which the second argument
will be the module **object** to be used by the loader.
- Note that the decorator
- will not work on static methods because of the assumption of two
- arguments.
+ Note that the decorator will not work on static methods because of the
+ assumption of two arguments.
The decorated method will take in the **name** of the module to be loaded
as expected for a :term:`loader`. If the module is not found in
:data:`sys.modules` then a new one is constructed with its
- :attr:`__name__` attribute set. Otherwise the module found in
- :data:`sys.modules` will be passed into the method. If an
- exception is raised by the decorated method and a module was added to
+ :attr:`__name__` attribute set to **name**, :attr:`__loader__` set to
+ **self**, and :attr:`__package__` set if
+ :meth:`importlib.abc.InspectLoader.is_package` is defined for **self** and
+ does not raise :exc:`ImportError` for **name**. If a new module is not
+ needed then the module found in :data:`sys.modules` will be passed into the
+ method.
+
+ If an exception is raised by the decorated method and a module was added to
:data:`sys.modules` it will be removed to prevent a partially initialized
module from being in left in :data:`sys.modules`. If the module was already
in :data:`sys.modules` then it is left alone.
Use of this decorator handles all the details of which module object a
- loader should initialize as specified by :pep:`302`.
+ loader should initialize as specified by :pep:`302` as best as possible.
+
+ .. versionchanged:: 3.3
+ :attr:`__loader__` and :attr:`__package__` are automatically set
+ (when possible).
.. decorator:: set_loader
@@ -504,7 +893,13 @@ an :term:`importer`.
to set the :attr:`__loader__`
attribute on loaded modules. If the attribute is already set the decorator
does nothing. It is assumed that the first positional argument to the
- wrapped method is what :attr:`__loader__` should be set to.
+ wrapped method (i.e. ``self``) is what :attr:`__loader__` should be set to.
+
+ .. note::
+
+ It is recommended that :func:`module_for_loader` be used over this
+ decorator as it subsumes this functionality.
+
.. decorator:: set_package
@@ -515,8 +910,12 @@ an :term:`importer`.
set on and not the module found in :data:`sys.modules`.
Reliance on this decorator is discouraged when it is possible to set
- :attr:`__package__` before the execution of the code is possible. By
- setting it before the code for the module is executed it allows the
- attribute to be used at the global level of the module during
+ :attr:`__package__` before importing. By
+ setting it beforehand the code for the module is executed with the
+ attribute set and thus can be used by global level code during
initialization.
+ .. note::
+
+ It is recommended that :func:`module_for_loader` be used over this
+ decorator as it subsumes this functionality.
diff --git a/Doc/library/index.rst b/Doc/library/index.rst
index 9ac688c..ba20361 100644
--- a/Doc/library/index.rst
+++ b/Doc/library/index.rst
@@ -46,7 +46,8 @@ the `Python Package Index <http://pypi.python.org/pypi>`_.
stdtypes.rst
exceptions.rst
- strings.rst
+ text.rst
+ binary.rst
datatypes.rst
numeric.rst
functional.rst
@@ -56,7 +57,7 @@ the `Python Package Index <http://pypi.python.org/pypi>`_.
fileformats.rst
crypto.rst
allos.rst
- someos.rst
+ concurrency.rst
ipc.rst
netdata.rst
markup.rst
diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst
index d127ce8..4050ab3 100644
--- a/Doc/library/inspect.rst
+++ b/Doc/library/inspect.rst
@@ -190,13 +190,26 @@ attributes:
compared to the constants defined in the :mod:`imp` module; see the
documentation for that module for more information on module types.
+ .. deprecated:: 3.3
+ You may check the file path's suffix against the supported suffixes
+ listed in :mod:`importlib.machinery` to infer the same information.
+
.. function:: getmodulename(path)
Return the name of the module named by the file *path*, without including the
- names of enclosing packages. This uses the same algorithm as the interpreter
- uses when searching for modules. If the name cannot be matched according to the
- interpreter's rules, ``None`` is returned.
+ names of enclosing packages. The file extension is checked against all of
+ the entries in :func:`importlib.machinery.all_suffixes`. If it matches,
+ the final path component is returned with the extension removed.
+ Otherwise, ``None`` is returned.
+
+ Note that this function *only* returns a meaningful name for actual
+ Python modules - paths that potentially refer to Python packages will
+ still return ``None``.
+
+ .. versionchanged:: 3.3
+ This function is now based directly on :mod:`importlib` rather than the
+ deprecated :func:`getmoduleinfo`.
.. function:: ismodule(object)
@@ -355,17 +368,25 @@ Retrieving source code
argument may be a module, class, method, function, traceback, frame, or code
object. The source code is returned as a list of the lines corresponding to the
object and the line number indicates where in the original source file the first
- line of code was found. An :exc:`IOError` is raised if the source code cannot
+ line of code was found. An :exc:`OSError` is raised if the source code cannot
be retrieved.
+ .. versionchanged:: 3.3
+ :exc:`OSError` is raised instead of :exc:`IOError`, now an alias of the
+ former.
+
.. function:: getsource(object)
Return the text of the source code for an object. The argument may be a module,
class, method, function, traceback, frame, or code object. The source code is
- returned as a single string. An :exc:`IOError` is raised if the source code
+ returned as a single string. An :exc:`OSError` is raised if the source code
cannot be retrieved.
+ .. versionchanged:: 3.3
+ :exc:`OSError` is raised instead of :exc:`IOError`, now an alias of the
+ former.
+
.. function:: cleandoc(doc)
@@ -374,6 +395,264 @@ Retrieving source code
onwards is removed. Also, all tabs are expanded to spaces.
+.. _inspect-signature-object:
+
+Introspecting callables with the Signature object
+-------------------------------------------------
+
+.. versionadded:: 3.3
+
+The Signature object represents the call signature of a callable object and its
+return annotation. To retrieve a Signature object, use the :func:`signature`
+function.
+
+.. function:: signature(callable)
+
+ Return a :class:`Signature` object for the given ``callable``::
+
+ >>> from inspect import signature
+ >>> def foo(a, *, b:int, **kwargs):
+ ... pass
+
+ >>> sig = signature(foo)
+
+ >>> str(sig)
+ '(a, *, b:int, **kwargs)'
+
+ >>> str(sig.parameters['b'])
+ 'b:int'
+
+ >>> sig.parameters['b'].annotation
+ <class 'int'>
+
+ Accepts a wide range of python callables, from plain functions and classes to
+ :func:`functools.partial` objects.
+
+ .. note::
+
+ Some callables may not be introspectable in certain implementations of
+ Python. For example, in CPython, built-in functions defined in C provide
+ no metadata about their arguments.
+
+
+.. class:: Signature
+
+ A Signature object represents the call signature of a function and its return
+ annotation. For each parameter accepted by the function it stores a
+ :class:`Parameter` object in its :attr:`parameters` collection.
+
+ Signature objects are *immutable*. Use :meth:`Signature.replace` to make a
+ modified copy.
+
+ .. attribute:: Signature.empty
+
+ A special class-level marker to specify absence of a return annotation.
+
+ .. attribute:: Signature.parameters
+
+ An ordered mapping of parameters' names to the corresponding
+ :class:`Parameter` objects.
+
+ .. attribute:: Signature.return_annotation
+
+ The "return" annotation for the callable. If the callable has no "return"
+ annotation, this attribute is set to :attr:`Signature.empty`.
+
+ .. method:: Signature.bind(*args, **kwargs)
+
+ Create a mapping from positional and keyword arguments to parameters.
+ Returns :class:`BoundArguments` if ``*args`` and ``**kwargs`` match the
+ signature, or raises a :exc:`TypeError`.
+
+ .. method:: Signature.bind_partial(*args, **kwargs)
+
+ Works the same way as :meth:`Signature.bind`, but allows the omission of
+ some required arguments (mimics :func:`functools.partial` behavior.)
+ Returns :class:`BoundArguments`, or raises a :exc:`TypeError` if the
+ passed arguments do not match the signature.
+
+ .. method:: Signature.replace(*[, parameters][, return_annotation])
+
+ Create a new Signature instance based on the instance replace was invoked
+ on. It is possible to pass different ``parameters`` and/or
+ ``return_annotation`` to override the corresponding properties of the base
+ signature. To remove return_annotation from the copied Signature, pass in
+ :attr:`Signature.empty`.
+
+ ::
+
+ >>> def test(a, b):
+ ... pass
+ >>> sig = signature(test)
+ >>> new_sig = sig.replace(return_annotation="new return anno")
+ >>> str(new_sig)
+ "(a, b) -> 'new return anno'"
+
+
+.. class:: Parameter
+
+ Parameter objects are *immutable*. Instead of modifying a Parameter object,
+ you can use :meth:`Parameter.replace` to create a modified copy.
+
+ .. attribute:: Parameter.empty
+
+ A special class-level marker to specify absence of default values and
+ annotations.
+
+ .. attribute:: Parameter.name
+
+ The name of the parameter as a string. Must be a valid python identifier
+ name (with the exception of ``POSITIONAL_ONLY`` parameters, which can have
+ it set to ``None``).
+
+ .. attribute:: Parameter.default
+
+ The default value for the parameter. If the parameter has no default
+ value, this attribute is set to :attr:`Parameter.empty`.
+
+ .. attribute:: Parameter.annotation
+
+ The annotation for the parameter. If the parameter has no annotation,
+ this attribute is set to :attr:`Parameter.empty`.
+
+ .. attribute:: Parameter.kind
+
+ Describes how argument values are bound to the parameter. Possible values
+ (accessible via :class:`Parameter`, like ``Parameter.KEYWORD_ONLY``):
+
+ +------------------------+----------------------------------------------+
+ | Name | Meaning |
+ +========================+==============================================+
+ | *POSITIONAL_ONLY* | Value must be supplied as a positional |
+ | | argument. |
+ | | |
+ | | Python has no explicit syntax for defining |
+ | | positional-only parameters, but many built-in|
+ | | and extension module functions (especially |
+ | | those that accept only one or two parameters)|
+ | | accept them. |
+ +------------------------+----------------------------------------------+
+ | *POSITIONAL_OR_KEYWORD*| Value may be supplied as either a keyword or |
+ | | positional argument (this is the standard |
+ | | binding behaviour for functions implemented |
+ | | in Python.) |
+ +------------------------+----------------------------------------------+
+ | *VAR_POSITIONAL* | A tuple of positional arguments that aren't |
+ | | bound to any other parameter. This |
+ | | corresponds to a ``*args`` parameter in a |
+ | | Python function definition. |
+ +------------------------+----------------------------------------------+
+ | *KEYWORD_ONLY* | Value must be supplied as a keyword argument.|
+ | | Keyword only parameters are those which |
+ | | appear after a ``*`` or ``*args`` entry in a |
+ | | Python function definition. |
+ +------------------------+----------------------------------------------+
+ | *VAR_KEYWORD* | A dict of keyword arguments that aren't bound|
+ | | to any other parameter. This corresponds to a|
+ | | ``**kwargs`` parameter in a Python function |
+ | | definition. |
+ +------------------------+----------------------------------------------+
+
+ Example: print all keyword-only arguments without default values::
+
+ >>> def foo(a, b, *, c, d=10):
+ ... pass
+
+ >>> sig = signature(foo)
+ >>> for param in sig.parameters.values():
+ ... if (param.kind == param.KEYWORD_ONLY and
+ ... param.default is param.empty):
+ ... print('Parameter:', param)
+ Parameter: c
+
+ .. method:: Parameter.replace(*[, name][, kind][, default][, annotation])
+
+ Create a new Parameter instance based on the instance replaced was invoked
+ on. To override a :class:`Parameter` attribute, pass the corresponding
+ argument. To remove a default value or/and an annotation from a
+ Parameter, pass :attr:`Parameter.empty`.
+
+ ::
+
+ >>> from inspect import Parameter
+ >>> param = Parameter('foo', Parameter.KEYWORD_ONLY, default=42)
+ >>> str(param)
+ 'foo=42'
+
+ >>> str(param.replace()) # Will create a shallow copy of 'param'
+ 'foo=42'
+
+ >>> str(param.replace(default=Parameter.empty, annotation='spam'))
+ "foo:'spam'"
+
+
+.. class:: BoundArguments
+
+ Result of a :meth:`Signature.bind` or :meth:`Signature.bind_partial` call.
+ Holds the mapping of arguments to the function's parameters.
+
+ .. attribute:: BoundArguments.arguments
+
+ An ordered, mutable mapping (:class:`collections.OrderedDict`) of
+ parameters' names to arguments' values. Contains only explicitly bound
+ arguments. Changes in :attr:`arguments` will reflect in :attr:`args` and
+ :attr:`kwargs`.
+
+ Should be used in conjunction with :attr:`Signature.parameters` for any
+ argument processing purposes.
+
+ .. note::
+
+ Arguments for which :meth:`Signature.bind` or
+ :meth:`Signature.bind_partial` relied on a default value are skipped.
+ However, if needed, it is easy to include them.
+
+ ::
+
+ >>> def foo(a, b=10):
+ ... pass
+
+ >>> sig = signature(foo)
+ >>> ba = sig.bind(5)
+
+ >>> ba.args, ba.kwargs
+ ((5,), {})
+
+ >>> for param in sig.parameters.values():
+ ... if param.name not in ba.arguments:
+ ... ba.arguments[param.name] = param.default
+
+ >>> ba.args, ba.kwargs
+ ((5, 10), {})
+
+
+ .. attribute:: BoundArguments.args
+
+ A tuple of positional arguments values. Dynamically computed from the
+ :attr:`arguments` attribute.
+
+ .. attribute:: BoundArguments.kwargs
+
+ A dict of keyword arguments values. Dynamically computed from the
+ :attr:`arguments` attribute.
+
+ The :attr:`args` and :attr:`kwargs` properties can be used to invoke
+ functions::
+
+ def test(a, *, b):
+ ...
+
+ sig = signature(test)
+ ba = sig.bind(10, b=20)
+ test(*ba.args, **ba.kwargs)
+
+
+.. seealso::
+
+ :pep:`362` - Function Signature Object.
+ The detailed specification, implementation details and examples.
+
+
.. _inspect-classes-functions:
Classes and functions
@@ -396,9 +675,9 @@ Classes and functions
:term:`named tuple` ``ArgSpec(args, varargs, keywords, defaults)`` is
returned. *args* is a list of the argument names. *varargs* and *keywords*
are the names of the ``*`` and ``**`` arguments or ``None``. *defaults* is a
- tuple of default argument values or None if there are no default arguments;
- if this tuple has *n* elements, they correspond to the last *n* elements
- listed in *args*.
+ tuple of default argument values or ``None`` if there are no default
+ arguments; if this tuple has *n* elements, they correspond to the last
+ *n* elements listed in *args*.
.. deprecated:: 3.0
Use :func:`getfullargspec` instead, which provides information about
@@ -414,14 +693,19 @@ Classes and functions
annotations)``
*args* is a list of the argument names. *varargs* and *varkw* are the names
- of the ``*`` and ``**`` arguments or ``None``. *defaults* is an n-tuple of
- the default values of the last n arguments. *kwonlyargs* is a list of
+ of the ``*`` and ``**`` arguments or ``None``. *defaults* is an *n*-tuple
+ of the default values of the last *n* arguments, or ``None`` if there are no
+ default arguments. *kwonlyargs* is a list of
keyword-only argument names. *kwonlydefaults* is a dictionary mapping names
from kwonlyargs to defaults. *annotations* is a dictionary mapping argument
names to annotations.
The first four items in the tuple correspond to :func:`getargspec`.
+ .. note::
+ Consider using the new :ref:`Signature Object <inspect-signature-object>`
+ interface, which provides a better way of introspecting functions.
+
.. function:: getargvalues(frame)
@@ -432,11 +716,16 @@ Classes and functions
locals dictionary of the given frame.
-.. function:: formatargspec(args[, varargs, varkw, defaults, formatarg, formatvarargs, formatvarkw, formatvalue])
+.. function:: formatargspec(args[, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations, formatarg, formatvarargs, formatvarkw, formatvalue, formatreturns, formatannotations])
- Format a pretty argument spec from the four values returned by
- :func:`getargspec`. The format\* arguments are the corresponding optional
- formatting functions that are called to turn names and values into strings.
+ Format a pretty argument spec from the values returned by
+ :func:`getargspec` or :func:`getfullargspec`.
+
+ The first seven arguments are (``args``, ``varargs``, ``varkw``,
+ ``defaults``, ``kwonlyargs``, ``kwonlydefaults``, ``annotations``). The
+ other five arguments are the corresponding optional formatting functions
+ that are called to turn names and values into strings. The last argument
+ is an optional function to format the sequence of arguments.
.. function:: formatargvalues(args[, varargs, varkw, locals, formatarg, formatvarargs, formatvarkw, formatvalue])
@@ -468,17 +757,36 @@ Classes and functions
>>> from inspect import getcallargs
>>> def f(a, b=1, *pos, **named):
... pass
- >>> getcallargs(f, 1, 2, 3)
- {'a': 1, 'named': {}, 'b': 2, 'pos': (3,)}
- >>> getcallargs(f, a=2, x=4)
- {'a': 2, 'named': {'x': 4}, 'b': 1, 'pos': ()}
+ >>> getcallargs(f, 1, 2, 3) == {'a': 1, 'named': {}, 'b': 2, 'pos': (3,)}
+ True
+ >>> getcallargs(f, a=2, x=4) == {'a': 2, 'named': {'x': 4}, 'b': 1, 'pos': ()}
+ True
>>> getcallargs(f)
Traceback (most recent call last):
...
- TypeError: f() takes at least 1 argument (0 given)
+ TypeError: f() missing 1 required positional argument: 'a'
.. versionadded:: 3.2
+ .. note::
+ Consider using the new :meth:`Signature.bind` instead.
+
+
+.. function:: getclosurevars(func)
+
+ Get the mapping of external name references in a Python function or
+ method *func* to their current values. A
+ :term:`named tuple` ``ClosureVars(nonlocals, globals, builtins, unbound)``
+ is returned. *nonlocals* maps referenced names to lexical closure
+ variables, *globals* to the function's module globals and *builtins* to
+ the builtins visible from the function body. *unbound* is the set of names
+ referenced in the function that could not be resolved at all given the
+ current module globals and builtins.
+
+ :exc:`TypeError` is raised if *func* is not a Python function or method.
+
+ .. versionadded:: 3.3
+
.. _inspect-stack:
@@ -643,3 +951,27 @@ generator to be determined easily.
* GEN_CLOSED: Execution has completed.
.. versionadded:: 3.2
+
+The current internal state of the generator can also be queried. This is
+mostly useful for testing purposes, to ensure that internal state is being
+updated as expected:
+
+.. function:: getgeneratorlocals(generator)
+
+ Get the mapping of live local variables in *generator* to their current
+ values. A dictionary is returned that maps from variable names to values.
+ This is the equivalent of calling :func:`locals` in the body of the
+ generator, and all the same caveats apply.
+
+ If *generator* is a :term:`generator` with no currently associated frame,
+ then an empty dictionary is returned. :exc:`TypeError` is raised if
+ *generator* is not a Python generator object.
+
+ .. impl-detail::
+
+ This function relies on the generator exposing a Python stack frame
+ for introspection, which isn't guaranteed to be the case in all
+ implementations of Python. In such cases, this function will always
+ return an empty dictionary.
+
+ .. versionadded:: 3.3
diff --git a/Doc/library/internet.rst b/Doc/library/internet.rst
index 6fa7873..b8950bb 100644
--- a/Doc/library/internet.rst
+++ b/Doc/library/internet.rst
@@ -23,10 +23,12 @@ is currently supported on most popular platforms. Here is an overview:
cgi.rst
cgitb.rst
wsgiref.rst
+ urllib.rst
urllib.request.rst
urllib.parse.rst
urllib.error.rst
urllib.robotparser.rst
+ http.rst
http.client.rst
ftplib.rst
poplib.rst
@@ -40,5 +42,7 @@ is currently supported on most popular platforms. Here is an overview:
http.server.rst
http.cookies.rst
http.cookiejar.rst
+ xmlrpc.rst
xmlrpc.client.rst
xmlrpc.server.rst
+ ipaddress.rst
diff --git a/Doc/library/io.rst b/Doc/library/io.rst
index 62eaf6d..e83e55c 100644
--- a/Doc/library/io.rst
+++ b/Doc/library/io.rst
@@ -37,6 +37,10 @@ giving a :class:`str` object to the ``write()`` method of a binary stream
will raise a ``TypeError``. So will giving a :class:`bytes` object to the
``write()`` method of a text stream.
+.. versionchanged:: 3.3
+ Operations that used to raise :exc:`IOError` now raise :exc:`OSError`, since
+ :exc:`IOError` is now an alias of :exc:`OSError`.
+
Text I/O
^^^^^^^^
@@ -55,7 +59,7 @@ In-memory text streams are also available as :class:`StringIO` objects::
f = io.StringIO("some initial text data")
-The text stream API is described in detail in the documentation for the
+The text stream API is described in detail in the documentation of
:class:`TextIOBase`.
@@ -113,21 +117,13 @@ High-level Module Interface
.. exception:: BlockingIOError
- Error raised when blocking would occur on a non-blocking stream. It inherits
- :exc:`IOError`.
-
- In addition to those of :exc:`IOError`, :exc:`BlockingIOError` has one
- attribute:
-
- .. attribute:: characters_written
-
- An integer containing the number of characters written to the stream
- before it blocked.
+ This is a compatibility alias for the builtin :exc:`BlockingIOError`
+ exception.
.. exception:: UnsupportedOperation
- An exception inheriting :exc:`IOError` and :exc:`ValueError` that is raised
+ An exception inheriting :exc:`OSError` and :exc:`ValueError` that is raised
when an unsupported operation is called on a stream.
@@ -206,8 +202,8 @@ I/O Base Classes
Even though :class:`IOBase` does not declare :meth:`read`, :meth:`readinto`,
or :meth:`write` because their signatures will vary, implementations and
clients should consider those methods part of the interface. Also,
- implementations may raise a :exc:`IOError` when operations they do not
- support are called.
+ implementations may raise a :exc:`ValueError` (or :exc:`UnsupportedOperation`)
+ when operations they do not support are called.
The basic type used for binary data read from or written to a file is
:class:`bytes`. :class:`bytearray`\s are accepted too, and in some cases
@@ -215,15 +211,15 @@ I/O Base Classes
:class:`str` data.
Note that calling any method (even inquiries) on a closed stream is
- undefined. Implementations may raise :exc:`IOError` in this case.
+ undefined. Implementations may raise :exc:`ValueError` in this case.
- IOBase (and its subclasses) support the iterator protocol, meaning that an
- :class:`IOBase` object can be iterated over yielding the lines in a stream.
- Lines are defined slightly differently depending on whether the stream is
- a binary stream (yielding bytes), or a text stream (yielding character
- strings). See :meth:`~IOBase.readline` below.
+ :class:`IOBase` (and its subclasses) support the iterator protocol, meaning
+ that an :class:`IOBase` object can be iterated over yielding the lines in a
+ stream. Lines are defined slightly differently depending on whether the
+ stream is a binary stream (yielding bytes), or a text stream (yielding
+ character strings). See :meth:`~IOBase.readline` below.
- IOBase is also a context manager and therefore supports the
+ :class:`IOBase` is also a context manager and therefore supports the
:keyword:`with` statement. In this example, *file* is closed after the
:keyword:`with` statement's suite is finished---even if an exception occurs::
@@ -243,12 +239,12 @@ I/O Base Classes
.. attribute:: closed
- True if the stream is closed.
+ ``True`` if the stream is closed.
.. method:: fileno()
Return the underlying file descriptor (an integer) of the stream if it
- exists. An :exc:`IOError` is raised if the IO object does not use a file
+ exists. An :exc:`OSError` is raised if the IO object does not use a file
descriptor.
.. method:: flush()
@@ -264,7 +260,7 @@ I/O Base Classes
.. method:: readable()
Return ``True`` if the stream can be read from. If False, :meth:`read`
- will raise :exc:`IOError`.
+ will raise :exc:`OSError`.
.. method:: readline(limit=-1)
@@ -299,10 +295,15 @@ I/O Base Classes
.. versionadded:: 3.1
The ``SEEK_*`` constants.
+ .. versionadded:: 3.3
+ Some operating systems could support additional values, like
+ :data:`os.SEEK_HOLE` or :data:`os.SEEK_DATA`. The valid values
+ for a file could depend on it being open in text or binary mode.
+
.. method:: seekable()
Return ``True`` if the stream supports random access. If ``False``,
- :meth:`seek`, :meth:`tell` and :meth:`truncate` will raise :exc:`IOError`.
+ :meth:`seek`, :meth:`tell` and :meth:`truncate` will raise :exc:`OSError`.
.. method:: tell()
@@ -320,7 +321,7 @@ I/O Base Classes
.. method:: writable()
Return ``True`` if the stream supports writing. If ``False``,
- :meth:`write` and :meth:`truncate` will raise :exc:`IOError`.
+ :meth:`write` and :meth:`truncate` will raise :exc:`OSError`.
.. method:: writelines(lines)
@@ -339,7 +340,7 @@ I/O Base Classes
(this is left to Buffered I/O and Text I/O, described later in this page).
In addition to the attributes and methods from :class:`IOBase`,
- RawIOBase provides the following methods:
+ :class:`RawIOBase` provides the following methods:
.. method:: read(n=-1)
@@ -359,18 +360,18 @@ I/O Base Classes
.. method:: readinto(b)
- Read up to len(b) bytes into bytearray *b* and return the number
- of bytes read. If the object is in non-blocking mode and no
+ Read up to ``len(b)`` bytes into :class:`bytearray` *b* and return the
+ number of bytes read. If the object is in non-blocking mode and no
bytes are available, ``None`` is returned.
.. method:: write(b)
- Write the given bytes or bytearray object, *b*, to the underlying raw
- stream and return the number of bytes written. This can be less than
- ``len(b)``, depending on specifics of the underlying raw stream, and
- especially if it is in non-blocking mode. ``None`` is returned if the
- raw stream is set not to block and no single byte could be readily
- written to it.
+ Write the given :class:`bytes` or :class:`bytearray` object, *b*, to the
+ underlying raw stream and return the number of bytes written. This can
+ be less than ``len(b)``, depending on specifics of the underlying raw
+ stream, and especially if it is in non-blocking mode. ``None`` is
+ returned if the raw stream is set not to block and no single byte could
+ be readily written to it.
.. class:: BufferedIOBase
@@ -420,8 +421,8 @@ I/O Base Classes
.. method:: read(n=-1)
Read and return up to *n* bytes. If the argument is omitted, ``None``, or
- negative, data is read and returned until EOF is reached. An empty bytes
- object is returned if the stream is already at EOF.
+ negative, data is read and returned until EOF is reached. An empty
+ :class:`bytes` object is returned if the stream is already at EOF.
If the argument is positive, and the underlying raw stream is not
interactive, multiple raw reads may be issued to satisfy the byte count
@@ -441,22 +442,23 @@ I/O Base Classes
.. method:: readinto(b)
- Read up to len(b) bytes into bytearray *b* and return the number of bytes
- read.
+ Read up to ``len(b)`` bytes into bytearray *b* and return the number of
+ bytes read.
Like :meth:`read`, multiple reads may be issued to the underlying raw
- stream, unless the latter is 'interactive'.
+ stream, unless the latter is interactive.
A :exc:`BlockingIOError` is raised if the underlying raw stream is in
non blocking-mode, and has no data available at the moment.
.. method:: write(b)
- Write the given bytes or bytearray object, *b* and return the number
- of bytes written (never less than ``len(b)``, since if the write fails
- an :exc:`IOError` will be raised). Depending on the actual
- implementation, these bytes may be readily written to the underlying
- stream, or held in a buffer for performance and latency reasons.
+ Write the given :class:`bytes` or :class:`bytearray` object, *b* and
+ return the number of bytes written (never less than ``len(b)``, since if
+ the write fails an :exc:`OSError` will be raised). Depending on the
+ actual implementation, these bytes may be readily written to the
+ underlying stream, or held in a buffer for performance and latency
+ reasons.
When in non-blocking mode, a :exc:`BlockingIOError` is raised if the
data needed to be written to the raw stream but it couldn't accept
@@ -466,7 +468,7 @@ I/O Base Classes
Raw File I/O
^^^^^^^^^^^^
-.. class:: FileIO(name, mode='r', closefd=True)
+.. class:: FileIO(name, mode='r', closefd=True, opener=None)
:class:`FileIO` represents an OS-level file containing bytes data.
It implements the :class:`RawIOBase` interface (and therefore the
@@ -474,22 +476,35 @@ Raw File I/O
The *name* can be one of two things:
- * a character string or bytes object representing the path to the file
- which will be opened;
+ * a character string or :class:`bytes` object representing the path to the
+ file which will be opened;
* an integer representing the number of an existing OS-level file descriptor
to which the resulting :class:`FileIO` object will give access.
- The *mode* can be ``'r'``, ``'w'`` or ``'a'`` for reading (default), writing,
- or appending. The file will be created if it doesn't exist when opened for
- writing or appending; it will be truncated when opened for writing. Add a
+ The *mode* can be ``'r'``, ``'w'``, ``'x'`` or ``'a'`` for reading
+ (default), writing, exclusive creation or appending. The file will be
+ created if it doesn't exist when opened for writing or appending; it will be
+ truncated when opened for writing. :exc:`FileExistsError` will be raised if
+ it already exists when opened for creating. Opening a file for creating
+ implies writing, so this mode behaves in a similar way to ``'w'``. Add a
``'+'`` to the mode to allow simultaneous reading and writing.
The :meth:`read` (when called with a positive argument), :meth:`readinto`
and :meth:`write` methods on this class will only make one system call.
+ A custom opener can be used by passing a callable as *opener*. The underlying
+ file descriptor for the file object is then obtained by calling *opener* with
+ (*name*, *flags*). *opener* must return an open file descriptor (passing
+ :mod:`os.open` as *opener* results in functionality similar to passing
+ ``None``).
+
+ .. versionchanged:: 3.3
+ The *opener* parameter was added.
+ The ``'x'`` mode was added.
+
In addition to the attributes and methods from :class:`IOBase` and
:class:`RawIOBase`, :class:`FileIO` provides the following data
- attributes and methods:
+ attributes:
.. attribute:: mode
@@ -537,7 +552,7 @@ than raw I/O does.
.. method:: getvalue()
- Return ``bytes`` containing the entire contents of the buffer.
+ Return :class:`bytes` containing the entire contents of the buffer.
.. method:: read1()
@@ -581,7 +596,7 @@ than raw I/O does.
A buffer providing higher-level access to a writeable, sequential
:class:`RawIOBase` object. It inherits :class:`BufferedIOBase`.
- When writing to this object, data is normally held into an internal
+ When writing to this object, data is normally placed into an internal
buffer. The buffer will be written out to the underlying :class:`RawIOBase`
object under various conditions, including:
@@ -594,8 +609,6 @@ than raw I/O does.
*raw* stream. If the *buffer_size* is not given, it defaults to
:data:`DEFAULT_BUFFER_SIZE`.
- A third argument, *max_buffer_size*, is supported, but unused and deprecated.
-
:class:`BufferedWriter` provides or overrides these methods in addition to
those from :class:`BufferedIOBase` and :class:`IOBase`:
@@ -606,9 +619,10 @@ than raw I/O does.
.. method:: write(b)
- Write the bytes or bytearray object, *b* and return the number of bytes
- written. When in non-blocking mode, a :exc:`BlockingIOError` is raised
- if the buffer needs to be written out but the raw stream blocks.
+ Write the :class:`bytes` or :class:`bytearray` object, *b* and return the
+ number of bytes written. When in non-blocking mode, a
+ :exc:`BlockingIOError` is raised if the buffer needs to be written out but
+ the raw stream blocks.
.. class:: BufferedRandom(raw, buffer_size=DEFAULT_BUFFER_SIZE)
@@ -621,8 +635,6 @@ than raw I/O does.
in the first argument. If the *buffer_size* is omitted it defaults to
:data:`DEFAULT_BUFFER_SIZE`.
- A third argument, *max_buffer_size*, is supported, but unused and deprecated.
-
:class:`BufferedRandom` is capable of anything :class:`BufferedReader` or
:class:`BufferedWriter` can do.
@@ -637,9 +649,6 @@ than raw I/O does.
writeable respectively. If the *buffer_size* is omitted it defaults to
:data:`DEFAULT_BUFFER_SIZE`.
- A fourth argument, *max_buffer_size*, is supported, but unused and
- deprecated.
-
:class:`BufferedRWPair` implements all of :class:`BufferedIOBase`\'s methods
except for :meth:`~BufferedIOBase.detach`, which raises
:exc:`UnsupportedOperation`.
@@ -682,7 +691,7 @@ Text I/O
The underlying binary buffer (a :class:`BufferedIOBase` instance) that
:class:`TextIOBase` deals with. This is not part of the
- :class:`TextIOBase` API and may not exist on some implementations.
+ :class:`TextIOBase` API and may not exist in some implementations.
.. method:: detach()
@@ -742,13 +751,15 @@ Text I/O
written.
-.. class:: TextIOWrapper(buffer, encoding=None, errors=None, newline=None, line_buffering=False)
+.. class:: TextIOWrapper(buffer, encoding=None, errors=None, newline=None, \
+ line_buffering=False, write_through=False)
A buffered text stream over a :class:`BufferedIOBase` binary stream.
It inherits :class:`TextIOBase`.
*encoding* gives the name of the encoding that the stream will be decoded or
- encoded with. It defaults to :func:`locale.getpreferredencoding`.
+ encoded with. It defaults to
+ :func:`locale.getpreferredencoding(False) <locale.getpreferredencoding>`.
*errors* is an optional string that specifies how encoding and decoding
errors are to be handled. Pass ``'strict'`` to raise a :exc:`ValueError`
@@ -785,6 +796,19 @@ Text I/O
If *line_buffering* is ``True``, :meth:`flush` is implied when a call to
write contains a newline character.
+ If *write_through* is ``True``, calls to :meth:`write` are guaranteed
+ not to be buffered: any data written on the :class:`TextIOWrapper`
+ object is immediately handled to its underlying binary *buffer*.
+
+ .. versionchanged:: 3.3
+ The *write_through* argument has been added.
+
+ .. versionchanged:: 3.3
+ The default *encoding* is now ``locale.getpreferredencoding(False)``
+ instead of ``locale.getpreferredencoding()``. Don't change temporary the
+ locale encoding using :func:`locale.setlocale`, use the current locale
+ encoding instead of the user preferred encoding.
+
:class:`TextIOWrapper` provides one attribute in addition to those of
:class:`TextIOBase` and its parents:
@@ -851,8 +875,8 @@ operating system's unbuffered I/O routines. The gain depends on the OS and the
kind of I/O which is performed. For example, on some modern OSes such as Linux,
unbuffered disk I/O can be as fast as buffered I/O. The bottom line, however,
is that buffered I/O offers predictable performance regardless of the platform
-and the backing device. Therefore, it is most always preferable to use buffered
-I/O rather than unbuffered I/O for binary datal
+and the backing device. Therefore, it is almost always preferable to use
+buffered I/O rather than unbuffered I/O for binary data.
Text I/O
^^^^^^^^
@@ -887,8 +911,8 @@ Binary buffered objects (instances of :class:`BufferedReader`,
:class:`BufferedWriter`, :class:`BufferedRandom` and :class:`BufferedRWPair`)
are not reentrant. While reentrant calls will not happen in normal situations,
they can arise from doing I/O in a :mod:`signal` handler. If a thread tries to
-renter a buffered object which it is already accessing, a :exc:`RuntimeError` is
-raised. Note this doesn't prohibit a different thread from entering the
+re-enter a buffered object which it is already accessing, a :exc:`RuntimeError`
+is raised. Note this doesn't prohibit a different thread from entering the
buffered object.
The above implicitly extends to text files, since the :func:`open()` function
diff --git a/Doc/library/ipaddress.rst b/Doc/library/ipaddress.rst
new file mode 100644
index 0000000..86d84af
--- /dev/null
+++ b/Doc/library/ipaddress.rst
@@ -0,0 +1,804 @@
+:mod:`ipaddress` --- IPv4/IPv6 manipulation library
+===================================================
+
+.. module:: ipaddress
+ :synopsis: IPv4/IPv6 manipulation library.
+.. moduleauthor:: Peter Moody
+
+**Source code:** :source:`Lib/ipaddress.py`
+
+--------------
+
+.. note::
+
+ The ``ipaddress`` module has been included in the standard library on a
+ :term:`provisional basis <provisional package>`. Backwards incompatible
+ changes (up to and including removal of the package) may occur if deemed
+ necessary by the core developers.
+
+:mod:`ipaddress` provides the capabilities to create, manipulate and
+operate on IPv4 and IPv6 addresses and networks.
+
+The functions and classes in this module make it straightforward to handle
+various tasks related to IP addresses, including checking whether or not two
+hosts are on the same subnet, iterating over all hosts in a particular
+subnet, checking whether or not a string represents a valid IP address or
+network definition, and so on.
+
+This is the full module API reference - for an overview and introduction,
+see :ref:`ipaddress-howto`.
+
+.. versionadded:: 3.3
+
+
+Convenience factory functions
+-----------------------------
+
+The :mod:`ipaddress` module provides factory functions to conveniently create
+IP addresses, networks and interfaces:
+
+.. function:: ip_address(address)
+
+ Return an :class:`IPv4Address` or :class:`IPv6Address` object depending on
+ the IP address passed as argument. Either IPv4 or IPv6 addresses may be
+ supplied; integers less than 2**32 will be considered to be IPv4 by default.
+ A :exc:`ValueError` is raised if *address* does not represent a valid IPv4
+ or IPv6 address.
+
+.. testsetup::
+ >>> import ipaddress
+ >>> from ipaddress import (ip_network, IPv4Address, IPv4Interface,
+ ... IPv4Network)
+
+::
+
+ >>> ipaddress.ip_address('192.168.0.1')
+ IPv4Address('192.168.0.1')
+ >>> ipaddress.ip_address('2001:db8::')
+ IPv6Address('2001:db8::')
+
+
+.. function:: ip_network(address, strict=True)
+
+ Return an :class:`IPv4Network` or :class:`IPv6Network` object depending on
+ the IP address passed as argument. *address* is a string or integer
+ representing the IP network. Either IPv4 or IPv6 networks may be supplied;
+ integers less than 2**32 will be considered to be IPv4 by default. *strict*
+ is passed to :class:`IPv4Network` or :class:`IPv6Network` constructor. A
+ :exc:`ValueError` is raised if *address* does not represent a valid IPv4 or
+ IPv6 address, or if the network has host bits set.
+
+ >>> ipaddress.ip_network('192.168.0.0/28')
+ IPv4Network('192.168.0.0/28')
+
+
+.. function:: ip_interface(address)
+
+ Return an :class:`IPv4Interface` or :class:`IPv6Interface` object depending
+ on the IP address passed as argument. *address* is a string or integer
+ representing the IP address. Either IPv4 or IPv6 addresses may be supplied;
+ integers less than 2**32 will be considered to be IPv4 by default. A
+ :exc:`ValueError` is raised if *address* does not represent a valid IPv4 or
+ IPv6 address.
+
+One downside of these convenience functions is that the need to handle both
+IPv4 and IPv6 formats means that error messages provide minimal
+information on the precise error, as the functions don't know whether the
+IPv4 or IPv6 format was intended. More detailed error reporting can be
+obtained by calling the appropriate version specific class constructors
+directly.
+
+
+IP Addresses
+------------
+
+Address objects
+^^^^^^^^^^^^^^^
+
+The :class:`IPv4Address` and :class:`IPv6Address` objects share a lot of common
+attributes. Some attributes that are only meaningful for IPv6 addresses are
+also implemented by :class:`IPv4Address` objects, in order to make it easier to
+write code that handles both IP versions correctly.
+
+.. class:: IPv4Address(address)
+
+ Construct an IPv4 address. An :exc:`AddressValueError` is raised if
+ *address* is not a valid IPv4 address.
+
+ The following constitutes a valid IPv4 address:
+
+ 1. A string in decimal-dot notation, consisting of four decimal integers in
+ the inclusive range 0-255, separated by dots (e.g. ``192.168.0.1``). Each
+ integer represents an octet (byte) in the address. Leading zeroes are
+ tolerated only for values less then 8 (as there is no ambiguity
+ between the decimal and octal interpretations of such strings).
+ 2. An integer that fits into 32 bits.
+ 3. An integer packed into a :class:`bytes` object of length 4 (most
+ significant octet first).
+
+ >>> ipaddress.IPv4Address('192.168.0.1')
+ IPv4Address('192.168.0.1')
+ >>> ipaddress.IPv4Address(3232235521)
+ IPv4Address('192.168.0.1')
+ >>> ipaddress.IPv4Address(b'\xC0\xA8\x00\x01')
+ IPv4Address('192.168.0.1')
+
+ .. attribute:: version
+
+ The appropriate version number: ``4`` for IPv4, ``6`` for IPv6.
+
+ .. attribute:: max_prefixlen
+
+ The total number of bits in the address representation for this
+ version: ``32`` for IPv4, ``128`` for IPv6.
+
+ The prefix defines the number of leading bits in an address that
+ are compared to determine whether or not an address is part of a
+ network.
+
+ .. attribute:: compressed
+ .. attribute:: exploded
+
+ The string representation in dotted decimal notation. Leading zeroes
+ are never included in the representation.
+
+ As IPv4 does not define a shorthand notation for addresses with octets
+ set to zero, these two attributes are always the same as ``str(addr)``
+ for IPv4 addresses. Exposing these attributes makes it easier to
+ write display code that can handle both IPv4 and IPv6 addresses.
+
+ .. attribute:: packed
+
+ The binary representation of this address - a :class:`bytes` object of
+ the appropriate length (most significant octet first). This is 4 bytes
+ for IPv4 and 16 bytes for IPv6.
+
+ .. attribute:: is_multicast
+
+ ``True`` if the address is reserved for multicast use. See
+ :RFC:`3171` (for IPv4) or :RFC:`2373` (for IPv6).
+
+ .. attribute:: is_private
+
+ ``True`` if the address is allocated for private networks. See
+ :RFC:`1918` (for IPv4) or :RFC:`4193` (for IPv6).
+
+ .. attribute:: is_unspecified
+
+ ``True`` if the address is unspecified. See :RFC:`5375` (for IPv4)
+ or :RFC:`2373` (for IPv6).
+
+ .. attribute:: is_reserved
+
+ ``True`` if the address is otherwise IETF reserved.
+
+ .. attribute:: is_loopback
+
+ ``True`` if this is a loopback address. See :RFC:`3330` (for IPv4)
+ or :RFC:`2373` (for IPv6).
+
+ .. attribute:: is_link_local
+
+ ``True`` if the address is reserved for link-local usage. See
+ :RFC:`3927`.
+
+
+.. class:: IPv6Address(address)
+
+ Construct an IPv6 address. An :exc:`AddressValueError` is raised if
+ *address* is not a valid IPv6 address.
+
+ The following constitutes a valid IPv6 address:
+
+ 1. A string consisting of eight groups of four hexadecimal digits, each
+ group representing 16 bits. The groups are separated by colons.
+ This describes an *exploded* (longhand) notation. The string can
+ also be *compressed* (shorthand notation) by various means. See
+ :RFC:`4291` for details. For example,
+ ``"0000:0000:0000:0000:0000:0abc:0007:0def"`` can be compressed to
+ ``"::abc:7:def"``.
+ 2. An integer that fits into 128 bits.
+ 3. An integer packed into a :class:`bytes` object of length 16, big-endian.
+
+ >>> ipaddress.IPv6Address('2001:db8::1000')
+ IPv6Address('2001:db8::1000')
+
+ .. attribute:: compressed
+
+ The short form of the address representation, with leading zeroes in
+ groups omitted and the longest sequence of groups consisting entirely of
+ zeroes collapsed to a single empty group.
+
+ This is also the value returned by ``str(addr)`` for IPv6 addresses.
+
+ .. attribute:: exploded
+
+ The long form of the address representation, with all leading zeroes and
+ groups consisting entirely of zeroes included.
+
+ .. attribute:: packed
+ .. attribute:: version
+ .. attribute:: max_prefixlen
+ .. attribute:: is_multicast
+ .. attribute:: is_private
+ .. attribute:: is_unspecified
+ .. attribute:: is_reserved
+ .. attribute:: is_loopback
+ .. attribute:: is_link_local
+
+ Refer to the corresponding attribute documentation in
+ :class:`IPv4Address`
+
+ .. attribute:: is_site_local
+
+ ``True`` if the address is reserved for site-local usage. Note that
+ the site-local address space has been deprecated by :RFC:`3879`. Use
+ :attr:`~IPv4Address.is_private` to test if this address is in the
+ space of unique local addresses as defined by :RFC:`4193`.
+
+ .. attribute:: ipv4_mapped
+
+ For addresses that appear to be IPv4 mapped addresses (starting with
+ ``::FFFF/96``), this property will report the embedded IPv4 address.
+ For any other address, this property will be ``None``.
+
+ .. attribute:: sixtofour
+
+ For addresses that appear to be 6to4 addresses (starting with
+ ``2002::/16``) as defined by :RFC:`3056`, this property will report
+ the embedded IPv4 address. For any other address, this property will
+ be ``None``.
+
+ .. attribute:: teredo
+
+ For addresses that appear to be Teredo addresses (starting with
+ ``2001::/32``) as defined by :RFC:`4380`, this property will report
+ the embedded ``(server, client)`` IP address pair. For any other
+ address, this property will be ``None``.
+
+
+Conversion to Strings and Integers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To interoperate with networking interfaces such as the socket module,
+addresses must be converted to strings or integers. This is handled using
+the :func:`str` and :func:`int` builtin functions::
+
+ >>> str(ipaddress.IPv4Address('192.168.0.1'))
+ '192.168.0.1'
+ >>> int(ipaddress.IPv4Address('192.168.0.1'))
+ 3232235521
+ >>> str(ipaddress.IPv6Address('::1'))
+ '::1'
+ >>> int(ipaddress.IPv6Address('::1'))
+ 1
+
+
+Operators
+^^^^^^^^^
+
+Address objects support some operators. Unless stated otherwise, operators can
+only be applied between compatible objects (i.e. IPv4 with IPv4, IPv6 with
+IPv6).
+
+
+Logical operators
+"""""""""""""""""
+
+Address objects can be compared with the usual set of logical operators. Some
+examples::
+
+ >>> IPv4Address('127.0.0.2') > IPv4Address('127.0.0.1')
+ True
+ >>> IPv4Address('127.0.0.2') == IPv4Address('127.0.0.1')
+ False
+ >>> IPv4Address('127.0.0.2') != IPv4Address('127.0.0.1')
+ True
+
+
+Arithmetic operators
+""""""""""""""""""""
+
+Integers can be added to or subtracted from address objects. Some examples::
+
+ >>> IPv4Address('127.0.0.2') + 3
+ IPv4Address('127.0.0.5')
+ >>> IPv4Address('127.0.0.2') - 3
+ IPv4Address('126.255.255.255')
+ >>> IPv4Address('255.255.255.255') + 1
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ ipaddress.AddressValueError: 4294967296 (>= 2**32) is not permitted as an IPv4 address
+
+
+IP Network definitions
+----------------------
+
+The :class:`IPv4Network` and :class:`IPv6Network` objects provide a mechanism
+for defining and inspecting IP network definitions. A network definition
+consists of a *mask* and a *network address*, and as such defines a range of
+IP addresses that equal the network address when masked (binary AND) with the
+mask. For example, a network definition with the mask ``255.255.255.0`` and
+the network address ``192.168.1.0`` consists of IP addresses in the inclusive
+range ``192.168.1.0`` to ``192.168.1.255``.
+
+
+Prefix, net mask and host mask
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+There are several equivalent ways to specify IP network masks. A *prefix*
+``/<nbits>`` is a notation that denotes how many high-order bits are set in
+the network mask. A *net mask* is an IP address with some number of
+high-order bits set. Thus the prefix ``/24`` is equivalent to the net mask
+``255.255.255.0`` in IPv4, or ``ffff:ff00::`` in IPv6. In addition, a
+*host mask* is the logical inverse of a *net mask*, and is sometimes used
+(for example in Cisco access control lists) to denote a network mask. The
+host mask equivalent to ``/24`` in IPv4 is ``0.0.0.255``.
+
+
+Network objects
+^^^^^^^^^^^^^^^
+
+All attributes implemented by address objects are implemented by network
+objects as well. In addition, network objects implement additional attributes.
+All of these are common between :class:`IPv4Network` and :class:`IPv6Network`,
+so to avoid duplication they are only documented for :class:`IPv4Network`.
+
+.. class:: IPv4Network(address, strict=True)
+
+ Construct an IPv4 network definition. *address* can be one of the following:
+
+ 1. A string consisting of an IP address and an optional mask, separated by
+ a slash (``/``). The IP address is the network address, and the mask
+ can be either a single number, which means it's a *prefix*, or a string
+ representation of an IPv4 address. If it's the latter, the mask is
+ interpreted as a *net mask* if it starts with a non-zero field, or as
+ a *host mask* if it starts with a zero field. If no mask is provided,
+ it's considered to be ``/32``.
+
+ For example, the following *address* specifications are equivalent:
+ ``192.168.1.0/24``, ``192.168.1.0/255.255.255.0`` and
+ ``192.168.1.0/0.0.0.255``.
+
+ 2. An integer that fits into 32 bits. This is equivalent to a
+ single-address network, with the network address being *address* and
+ the mask being ``/32``.
+
+ 3. An integer packed into a :class:`bytes` object of length 4, big-endian.
+ The interpretation is similar to an integer *address*.
+
+ An :exc:`AddressValueError` is raised if *address* is not a valid IPv4
+ address. A :exc:`NetmaskValueError` is raised if the mask is not valid for
+ an IPv4 address.
+
+ If *strict* is ``True`` and host bits are set in the supplied address,
+ then :exc:`ValueError` is raised. Otherwise, the host bits are masked out
+ to determine the appropriate network address.
+
+ Unless stated otherwise, all network methods accepting other network/address
+ objects will raise :exc:`TypeError` if the argument's IP version is
+ incompatible to ``self``
+
+ .. attribute:: version
+ .. attribute:: max_prefixlen
+
+ Refer to the corresponding attribute documentation in
+ :class:`IPv4Address`
+
+ .. attribute:: is_multicast
+ .. attribute:: is_private
+ .. attribute:: is_unspecified
+ .. attribute:: is_reserved
+ .. attribute:: is_loopback
+ .. attribute:: is_link_local
+
+ These attributes are true for the network as a whole if they are true
+ true for both the network address and the broadcast address
+
+ .. attribute:: network_address
+
+ The network address for the network. The network address and the
+ prefix length together uniquely define a network.
+
+ .. attribute:: broadcast_address
+
+ The broadcast address for the network. Packets sent to the broadcast
+ address should be received by every host on the network.
+
+ .. attribute:: host mask
+
+ The host mask, as a string.
+
+ .. attribute:: with_prefixlen
+ .. attribute:: compressed
+ .. attribute:: exploded
+
+ A string representation of the network, with the mask in prefix
+ notation.
+
+ ``with_prefixlen`` and ``compressed`` are always the same as
+ ``str(network)``.
+ ``exploded`` uses the exploded form the network address.
+
+ .. attribute:: with_netmask
+
+ A string representation of the network, with the mask in net mask
+ notation.
+
+ .. attribute:: with_hostmask
+
+ A string representation of the network, with the mask in host mask
+ notation.
+
+ .. attribute:: num_addresses
+
+ The total number of addresses in the network.
+
+ .. attribute:: prefixlen
+
+ Length of the network prefix, in bits.
+
+ .. method:: hosts()
+
+ Returns an iterator over the usable hosts in the network. The usable
+ hosts are all the IP addresses that belong to the network, except the
+ network address itself and the network broadcast address.
+
+ >>> list(ip_network('192.0.2.0/29').hosts()) #doctest: +NORMALIZE_WHITESPACE
+ [IPv4Address('192.0.2.1'), IPv4Address('192.0.2.2'),
+ IPv4Address('192.0.2.3'), IPv4Address('192.0.2.4'),
+ IPv4Address('192.0.2.5'), IPv4Address('192.0.2.6')]
+
+ .. method:: overlaps(other)
+
+ ``True`` if this network is partly or wholly contained in *other* or
+ or *other* is wholly contained in this network.
+
+ .. method:: address_exclude(network)
+
+ Computes the network definitions resulting from removing the given
+ *network* from this one. Returns an iterator of network objects.
+ Raises :exc:`ValueError` if *network* is not completely contained in
+ this network.
+
+ >>> n1 = ip_network('192.0.2.0/28')
+ >>> n2 = ip_network('192.0.2.1/32')
+ >>> list(n1.address_exclude(n2)) #doctest: +NORMALIZE_WHITESPACE
+ [IPv4Network('192.0.2.8/29'), IPv4Network('192.0.2.4/30'),
+ IPv4Network('192.0.2.2/31'), IPv4Network('192.0.2.0/32')]
+
+ .. method:: subnets(prefixlen_diff=1, new_prefix=None)
+
+ The subnets that join to make the current network definition, depending
+ on the argument values. *prefixlen_diff* is the amount our prefix
+ length should be increased by. *new_prefix* is the desired new
+ prefix of the subnets; it must be larger than our prefix. One and
+ only one of *prefixlen_diff* and *new_prefix* must be set. Returns an
+ iterator of network objects.
+
+ >>> list(ip_network('192.0.2.0/24').subnets())
+ [IPv4Network('192.0.2.0/25'), IPv4Network('192.0.2.128/25')]
+ >>> list(ip_network('192.0.2.0/24').subnets(prefixlen_diff=2)) #doctest: +NORMALIZE_WHITESPACE
+ [IPv4Network('192.0.2.0/26'), IPv4Network('192.0.2.64/26'),
+ IPv4Network('192.0.2.128/26'), IPv4Network('192.0.2.192/26')]
+ >>> list(ip_network('192.0.2.0/24').subnets(new_prefix=26)) #doctest: +NORMALIZE_WHITESPACE
+ [IPv4Network('192.0.2.0/26'), IPv4Network('192.0.2.64/26'),
+ IPv4Network('192.0.2.128/26'), IPv4Network('192.0.2.192/26')]
+ >>> list(ip_network('192.0.2.0/24').subnets(new_prefix=23))
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ raise ValueError('new prefix must be longer')
+ ValueError: new prefix must be longer
+ >>> list(ip_network('192.0.2.0/24').subnets(new_prefix=25))
+ [IPv4Network('192.0.2.0/25'), IPv4Network('192.0.2.128/25')]
+
+ .. method:: supernet(prefixlen_diff=1, new_prefix=None)
+
+ The supernet containing this network definition, depending on the
+ argument values. *prefixlen_diff* is the amount our prefix length
+ should be decreased by. *new_prefix* is the desired new prefix of
+ the supernet; it must be smaller than our prefix. One and only one
+ of *prefixlen_diff* and *new_prefix* must be set. Returns a single
+ network object.
+
+ >>> ip_network('192.0.2.0/24').supernet()
+ IPv4Network('192.0.2.0/23')
+ >>> ip_network('192.0.2.0/24').supernet(prefixlen_diff=2)
+ IPv4Network('192.0.0.0/22')
+ >>> ip_network('192.0.2.0/24').supernet(new_prefix=20)
+ IPv4Network('192.0.0.0/20')
+
+ .. method:: compare_networks(other)
+
+ Compare this network to *other*. In this comparison only the network
+ addresses are considered; host bits aren't. Returns either ``-1``,
+ ``0`` or ``1``.
+
+ >>> ip_network('192.0.2.1/32').compare_networks(ip_network('192.0.2.2/32'))
+ -1
+ >>> ip_network('192.0.2.1/32').compare_networks(ip_network('192.0.2.0/32'))
+ 1
+ >>> ip_network('192.0.2.1/32').compare_networks(ip_network('192.0.2.1/32'))
+ 0
+
+
+.. class:: IPv6Network(address, strict=True)
+
+ Construct an IPv6 network definition. *address* can be one of the following:
+
+ 1. A string consisting of an IP address and an optional mask, separated by
+ a slash (``/``). The IP address is the network address, and the mask
+ can be either a single number, which means it's a *prefix*, or a string
+ representation of an IPv6 address. If it's the latter, the mask is
+ interpreted as a *net mask*. If no mask is provided, it's considered to
+ be ``/128``.
+
+ For example, the following *address* specifications are equivalent:
+ ``2001:db00::0/24`` and ``2001:db00::0/ffff:ff00::``.
+
+ 2. An integer that fits into 128 bits. This is equivalent to a
+ single-address network, with the network address being *address* and
+ the mask being ``/128``.
+
+ 3. An integer packed into a :class:`bytes` object of length 16, bit-endian.
+ The interpretation is similar to an integer *address*.
+
+ An :exc:`AddressValueError` is raised if *address* is not a valid IPv6
+ address. A :exc:`NetmaskValueError` is raised if the mask is not valid for
+ an IPv6 address.
+
+ If *strict* is ``True`` and host bits are set in the supplied address,
+ then :exc:`ValueError` is raised. Otherwise, the host bits are masked out
+ to determine the appropriate network address.
+
+ .. attribute:: version
+ .. attribute:: max_prefixlen
+ .. attribute:: is_multicast
+ .. attribute:: is_private
+ .. attribute:: is_unspecified
+ .. attribute:: is_reserved
+ .. attribute:: is_loopback
+ .. attribute:: is_link_local
+ .. attribute:: network_address
+ .. attribute:: broadcast_address
+ .. attribute:: host mask
+ .. attribute:: with_prefixlen
+ .. attribute:: compressed
+ .. attribute:: exploded
+ .. attribute:: with_netmask
+ .. attribute:: with_hostmask
+ .. attribute:: num_addresses
+ .. attribute:: prefixlen
+ .. method:: hosts()
+ .. method:: overlaps(other)
+ .. method:: address_exclude(network)
+ .. method:: subnets(prefixlen_diff=1, new_prefix=None)
+ .. method:: supernet(prefixlen_diff=1, new_prefix=None)
+ .. method:: compare_networks(other)
+
+ Refer to the corresponding attribute documentation in
+ :class:`IPv4Network`
+
+ .. attribute:: is_site_local
+
+ These attribute is true for the network as a whole if it is true
+ true for both the network address and the broadcast address
+
+
+Operators
+^^^^^^^^^
+
+Network objects support some operators. Unless stated otherwise, operators can
+only be applied between compatible objects (i.e. IPv4 with IPv4, IPv6 with
+IPv6).
+
+
+Logical operators
+"""""""""""""""""
+
+Network objects can be compared with the usual set of logical operators,
+similarly to address objects.
+
+
+Iteration
+"""""""""
+
+Network objects can be iterated to list all the addresses belonging to the
+network. For iteration, *all* hosts are returned, including unusable hosts
+(for usable hosts, use the :meth:`~IPv4Network.hosts` method). An
+example::
+
+ >>> for addr in IPv4Network('192.0.2.0/28'):
+ ... addr
+ ...
+ IPv4Address('192.0.2.0')
+ IPv4Address('192.0.2.1')
+ IPv4Address('192.0.2.2')
+ IPv4Address('192.0.2.3')
+ IPv4Address('192.0.2.4')
+ IPv4Address('192.0.2.5')
+ IPv4Address('192.0.2.6')
+ IPv4Address('192.0.2.7')
+ IPv4Address('192.0.2.8')
+ IPv4Address('192.0.2.9')
+ IPv4Address('192.0.2.10')
+ IPv4Address('192.0.2.11')
+ IPv4Address('192.0.2.12')
+ IPv4Address('192.0.2.13')
+ IPv4Address('192.0.2.14')
+ IPv4Address('192.0.2.15')
+
+
+Networks as containers of addresses
+"""""""""""""""""""""""""""""""""""
+
+Network objects can act as containers of addresses. Some examples::
+
+ >>> IPv4Network('192.0.2.0/28')[0]
+ IPv4Address('192.0.2.0')
+ >>> IPv4Network('192.0.2.0/28')[15]
+ IPv4Address('192.0.2.15')
+ >>> IPv4Address('192.0.2.6') in IPv4Network('192.0.2.0/28')
+ True
+ >>> IPv4Address('192.0.3.6') in IPv4Network('192.0.2.0/28')
+ False
+
+
+Interface objects
+-----------------
+
+.. class:: IPv4Interface(address)
+
+ Construct an IPv4 interface. The meaning of *address* is as in the
+ constructor of :class:`IPv4Network`, except that arbitrary host addresses
+ are always accepted.
+
+ :class:`IPv4Interface` is a subclass of :class:`IPv4Address`, so it inherits
+ all the attributes from that class. In addition, the following attributes
+ are available:
+
+ .. attribute:: ip
+
+ The address (:class:`IPv4Address`) without network information.
+
+ >>> interface = IPv4Interface('192.0.2.5/24')
+ >>> interface.ip
+ IPv4Address('192.0.2.5')
+
+ .. attribute:: network
+
+ The network (:class:`IPv4Network`) this interface belongs to.
+
+ >>> interface = IPv4Interface('192.0.2.5/24')
+ >>> interface.network
+ IPv4Network('192.0.2.0/24')
+
+ .. attribute:: with_prefixlen
+
+ A string representation of the interface with the mask in prefix notation.
+
+ >>> interface = IPv4Interface('192.0.2.5/24')
+ >>> interface.with_prefixlen
+ '192.0.2.5/24'
+
+ .. attribute:: with_netmask
+
+ A string representation of the interface with the network as a net mask.
+
+ >>> interface = IPv4Interface('192.0.2.5/24')
+ >>> interface.with_netmask
+ '192.0.2.5/255.255.255.0'
+
+ .. attribute:: with_hostmask
+
+ A string representation of the interface with the network as a host mask.
+
+ >>> interface = IPv4Interface('192.0.2.5/24')
+ >>> interface.with_hostmask
+ '192.0.2.5/0.0.0.255'
+
+
+.. class:: IPv6Interface(address)
+
+ Construct an IPv6 interface. The meaning of *address* is as in the
+ constructor of :class:`IPv6Network`, except that arbitrary host addresses
+ are always accepted.
+
+ :class:`IPv6Interface` is a subclass of :class:`IPv6Address`, so it inherits
+ all the attributes from that class. In addition, the following attributes
+ are available:
+
+ .. attribute:: ip
+ .. attribute:: network
+ .. attribute:: with_prefixlen
+ .. attribute:: with_netmask
+ .. attribute:: with_hostmask
+
+ Refer to the corresponding attribute documentation in
+ :class:`IPv4Interface`.
+
+
+Other Module Level Functions
+----------------------------
+
+The module also provides the following module level functions:
+
+.. function:: v4_int_to_packed(address)
+
+ Represent an address as 4 packed bytes in network (big-endian) order.
+ *address* is an integer representation of an IPv4 IP address. A
+ :exc:`ValueError` is raised if the integer is negative or too large to be an
+ IPv4 IP address.
+
+ >>> ipaddress.ip_address(3221225985)
+ IPv4Address('192.0.2.1')
+ >>> ipaddress.v4_int_to_packed(3221225985)
+ b'\xc0\x00\x02\x01'
+
+
+.. function:: v6_int_to_packed(address)
+
+ Represent an address as 16 packed bytes in network (big-endian) order.
+ *address* is an integer representation of an IPv6 IP address. A
+ :exc:`ValueError` is raised if the integer is negative or too large to be an
+ IPv6 IP address.
+
+
+.. function:: summarize_address_range(first, last)
+
+ Return an iterator of the summarized network range given the first and last
+ IP addresses. *first* is the first :class:`IPv4Address` or
+ :class:`IPv6Address` in the range and *last* is the last :class:`IPv4Address`
+ or :class:`IPv6Address` in the range. A :exc:`TypeError` is raised if
+ *first* or *last* are not IP addresses or are not of the same version. A
+ :exc:`ValueError` is raised if *last* is not greater than *first* or if
+ *first* address version is not 4 or 6.
+
+ >>> [ipaddr for ipaddr in ipaddress.summarize_address_range(
+ ... ipaddress.IPv4Address('192.0.2.0'),
+ ... ipaddress.IPv4Address('192.0.2.130'))]
+ [IPv4Network('192.0.2.0/25'), IPv4Network('192.0.2.128/31'), IPv4Network('192.0.2.130/32')]
+
+
+.. function:: collapse_addresses(addresses)
+
+ Return an iterator of the collapsed :class:`IPv4Network` or
+ :class:`IPv6Network` objects. *addresses* is an iterator of
+ :class:`IPv4Network` or :class:`IPv6Network` objects. A :exc:`TypeError` is
+ raised if *addresses* contains mixed version objects.
+
+ >>> [ipaddr for ipaddr in
+ ... ipaddress.collapse_addresses([ipaddress.IPv4Network('192.0.2.0/25'),
+ ... ipaddress.IPv4Network('192.0.2.128/25')])]
+ [IPv4Network('192.0.2.0/24')]
+
+
+.. function:: get_mixed_type_key(obj)
+
+ Return a key suitable for sorting between networks and addresses. Address
+ and Network objects are not sortable by default; they're fundamentally
+ different, so the expression::
+
+ IPv4Address('192.0.2.0') <= IPv4Network('192.0.2.0/24')
+
+ doesn't make sense. There are some times however, where you may wish to
+ have :mod:`ipaddress` sort these anyway. If you need to do this, you can use
+ this function as the ``key`` argument to :func:`sorted()`.
+
+ *obj* is either a network or address object.
+
+
+Custom Exceptions
+-----------------
+
+To support more specific error reporting from class constructors, the
+module defines the following exceptions:
+
+.. exception:: AddressValueError(ValueError)
+
+ Any value error related to the address.
+
+
+.. exception:: NetmaskValueError(ValueError)
+
+ Any value error related to the netmask.
diff --git a/Doc/library/ipc.rst b/Doc/library/ipc.rst
index c873065..91ec693 100644
--- a/Doc/library/ipc.rst
+++ b/Doc/library/ipc.rst
@@ -8,7 +8,7 @@ The modules described in this chapter provide mechanisms for different processes
to communicate.
Some modules only work for two processes that are on the same machine, e.g.
-:mod:`signal` and :mod:`subprocess`. Other modules support networking protocols
+:mod:`signal` and :mod:`mmap`. Other modules support networking protocols
that two or more processes can used to communicate across machines.
The list of modules described in this chapter is:
@@ -16,9 +16,9 @@ The list of modules described in this chapter is:
.. toctree::
- subprocess.rst
socket.rst
ssl.rst
- signal.rst
asyncore.rst
asynchat.rst
+ signal.rst
+ mmap.rst
diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst
index 308f925..1eb554a 100644
--- a/Doc/library/itertools.rst
+++ b/Doc/library/itertools.rst
@@ -46,7 +46,7 @@ Iterator Arguments Results
==================== ============================ ================================================= =============================================================
Iterator Arguments Results Example
==================== ============================ ================================================= =============================================================
-:func:`accumulate` p p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15``
+:func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15``
:func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') --> A B C D E F``
:func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F``
:func:`dropwhile` pred, seq seq[n], seq[n+1], starting when pred fails ``dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1``
@@ -84,23 +84,61 @@ The following module functions all construct and return iterators. Some provide
streams of infinite length, so they should only be accessed by functions or
loops that truncate the stream.
-.. function:: accumulate(iterable)
+.. function:: accumulate(iterable[, func])
Make an iterator that returns accumulated sums. Elements may be any addable
- type including :class:`Decimal` or :class:`Fraction`. Equivalent to::
+ type including :class:`Decimal` or :class:`Fraction`. If the optional
+ *func* argument is supplied, it should be a function of two arguments
+ and it will be used instead of addition.
- def accumulate(iterable):
+ Equivalent to::
+
+ def accumulate(iterable, func=operator.add):
'Return running totals'
# accumulate([1,2,3,4,5]) --> 1 3 6 10 15
+ # accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120
it = iter(iterable)
total = next(it)
yield total
for element in it:
- total = total + element
+ total = func(total, element)
yield total
+ There are a number of uses for the *func* argument. It can be set to
+ :func:`min` for a running minimum, :func:`max` for a running maximum, or
+ :func:`operator.mul` for a running product. Amortization tables can be
+ built by accumulating interest and applying payments. First-order
+ `recurrence relations <http://en.wikipedia.org/wiki/Recurrence_relation>`_
+ can be modeled by supplying the initial value in the iterable and using only
+ the accumulated total in *func* argument::
+
+ >>> data = [3, 4, 6, 2, 1, 9, 0, 7, 5, 8]
+ >>> list(accumulate(data, operator.mul)) # running product
+ [3, 12, 72, 144, 144, 1296, 0, 0, 0, 0]
+ >>> list(accumulate(data, max)) # running maximum
+ [3, 4, 6, 6, 6, 9, 9, 9, 9, 9]
+
+ # Amortize a 5% loan of 1000 with 4 annual payments of 90
+ >>> cashflows = [1000, -90, -90, -90, -90]
+ >>> list(accumulate(cashflows, lambda bal, pmt: bal*1.05 + pmt))
+ [1000, 960.0, 918.0, 873.9000000000001, 827.5950000000001]
+
+ # Chaotic recurrence relation http://en.wikipedia.org/wiki/Logistic_map
+ >>> logistic_map = lambda x, _: r * x * (1 - x)
+ >>> r = 3.8
+ >>> x0 = 0.4
+ >>> inputs = repeat(x0, 36) # only the initial value is used
+ >>> [format(x, '.2f') for x in accumulate(inputs, logistic_map)]
+ ['0.40', '0.91', '0.30', '0.81', '0.60', '0.92', '0.29', '0.79', '0.63',
+ '0.88' ,'0.39', '0.90', '0.33', '0.84', '0.52', '0.95', '0.18', '0.57',
+ '0.93', '0.25', '0.71', '0.79', '0.63', '0.88', '0.39', '0.91', '0.32',
+ '0.83', '0.54', '0.95', '0.20', '0.60', '0.91', '0.30', '0.80', '0.60']
+
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ Added the optional *func* parameter.
+
.. function:: chain(*iterables)
Make an iterator that returns elements from the first iterable until it is
@@ -668,7 +706,8 @@ which incur interpreter overhead.
return zip(a, b)
def grouper(n, iterable, fillvalue=None):
- "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
+ "Collect data into fixed-length chunks or blocks"
+ # grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst
index 45aba0a..d46f1cf 100644
--- a/Doc/library/locale.rst
+++ b/Doc/library/locale.rst
@@ -475,8 +475,11 @@ in such a way that frequent locale changes may cause core dumps. This makes the
locale somewhat painful to use correctly.
Initially, when a program is started, the locale is the ``C`` locale, no matter
-what the user's preferred locale is. The program must explicitly say that it
-wants the user's preferred locale settings by calling ``setlocale(LC_ALL, '')``.
+what the user's preferred locale is. There is one exception: the
+:data:`LC_CTYPE` category is changed at startup to set the current locale
+encoding to the user's preferred locale encoding. The program must explicitly
+say that it wants the user's preferred locale settings for other categories by
+calling ``setlocale(LC_ALL, '')``.
It is generally a bad idea to call :func:`setlocale` in some library routine,
since as a side effect it affects the entire program. Saving and restoring it
diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst
index ef65cfa..537b8c7 100644
--- a/Doc/library/logging.handlers.rst
+++ b/Doc/library/logging.handlers.rst
@@ -164,6 +164,87 @@ this value.
changed. If it has, the existing stream is flushed and closed and the
file opened again, before outputting the record to the file.
+.. _base-rotating-handler:
+
+BaseRotatingHandler
+^^^^^^^^^^^^^^^^^^^
+
+The :class:`BaseRotatingHandler` class, located in the :mod:`logging.handlers`
+module, is the base class for the rotating file handlers,
+:class:`RotatingFileHandler` and :class:`TimedRotatingFileHandler`. You should
+not need to instantiate this class, but it has attributes and methods you may
+need to override.
+
+.. class:: BaseRotatingHandler(filename, mode, encoding=None, delay=False)
+
+ The parameters are as for :class:`FileHandler`. The attributes are:
+
+ .. attribute:: namer
+
+ If this attribute is set to a callable, the :meth:`rotation_filename`
+ method delegates to this callable. The parameters passed to the callable
+ are those passed to :meth:`rotation_filename`.
+
+ .. note:: The namer function is called quite a few times during rollover,
+ so it should be as simple and as fast as possible. It should also
+ return the same output every time for a given input, otherwise the
+ rollover behaviour may not work as expected.
+
+ .. versionadded:: 3.3
+
+
+ .. attribute:: BaseRotatingHandler.rotator
+
+ If this attribute is set to a callable, the :meth:`rotate` method
+ delegates to this callable. The parameters passed to the callable are
+ those passed to :meth:`rotate`.
+
+ .. versionadded:: 3.3
+
+ .. method:: BaseRotatingHandler.rotation_filename(default_name)
+
+ Modify the filename of a log file when rotating.
+
+ This is provided so that a custom filename can be provided.
+
+ The default implementation calls the 'namer' attribute of the handler,
+ if it's callable, passing the default name to it. If the attribute isn't
+ callable (the default is ``None``), the name is returned unchanged.
+
+ :param default_name: The default name for the log file.
+
+ .. versionadded:: 3.3
+
+
+ .. method:: BaseRotatingHandler.rotate(source, dest)
+
+ When rotating, rotate the current log.
+
+ The default implementation calls the 'rotator' attribute of the handler,
+ if it's callable, passing the source and dest arguments to it. If the
+ attribute isn't callable (the default is ``None``), the source is simply
+ renamed to the destination.
+
+ :param source: The source filename. This is normally the base
+ filename, e.g. 'test.log'
+ :param dest: The destination filename. This is normally
+ what the source is rotated to, e.g. 'test.log.1'.
+
+ .. versionadded:: 3.3
+
+The reason the attributes exist is to save you having to subclass - you can use
+the same callables for instances of :class:`RotatingFileHandler` and
+:class:`TimedRotatingFileHandler`. If either the namer or rotator callable
+raises an exception, this will be handled in the same way as any other
+exception during an :meth:`emit` call, i.e. via the :meth:`handleError` method
+of the handler.
+
+If you need to make more significant changes to rotation processing, you can
+override the methods.
+
+For an example, see :ref:`cookbook-rotator-namer`.
+
+
.. _rotating-file-handler:
RotatingFileHandler
@@ -452,6 +533,15 @@ supports sending logging messages to a remote or local Unix syslog.
behaviour) but can be set to ``False`` on a ``SysLogHandler`` instance
in order for that instance to *not* append the NUL terminator.
+ .. versionchanged:: 3.3
+ (See: :issue:`12419`.) In earlier versions, there was no facility for
+ an "ident" or "tag" prefix to identify the source of the message. This
+ can now be specified using a class-level attribute, defaulting to
+ ``""`` to preserve existing behaviour, but which can be overridden on
+ a ``SysLogHandler`` instance in order for that instance to prepend
+ the ident to every message handled. Note that the provided ident must
+ be text, not bytes, and is prepended to the message exactly as is.
+
.. method:: encodePriority(facility, priority)
Encodes the facility and priority into an integer. You can pass in strings
@@ -614,7 +704,7 @@ The :class:`SMTPHandler` class, located in the :mod:`logging.handlers` module,
supports sending logging messages to an email address via SMTP.
-.. class:: SMTPHandler(mailhost, fromaddr, toaddrs, subject, credentials=None, secure=None)
+.. class:: SMTPHandler(mailhost, fromaddr, toaddrs, subject, credentials=None, secure=None, timeout=1.0)
Returns a new instance of the :class:`SMTPHandler` class. The instance is
initialized with the from and to addresses and subject line of the email. The
@@ -630,6 +720,12 @@ supports sending logging messages to an email address via SMTP.
and certificate file. (This tuple is passed to the
:meth:`smtplib.SMTP.starttls` method.)
+ A timeout can be specified for communication with the SMTP server using the
+ *timeout* argument.
+
+ .. versionadded:: 3.3
+ The *timeout* argument was added.
+
.. method:: emit(record)
Formats the record and sends it to the specified addressees.
@@ -690,7 +786,7 @@ should, then :meth:`flush` is expected to do the flushing.
.. method:: close()
- Calls :meth:`flush`, sets the target to :const:`None` and clears the
+ Calls :meth:`flush`, sets the target to ``None`` and clears the
buffer.
@@ -859,6 +955,15 @@ possible, while any potentially slow operations (such as sending an email via
Note that if you don't call this before your application exits, there
may be some records still left on the queue, which won't be processed.
+ .. method:: enqueue_sentinel()
+
+ Writes a sentinel to the queue to tell the listener to quit. This
+ implementation uses ``put_nowait()``. You may want to override this
+ method if you want to use timeouts or work with custom queue
+ implementations.
+
+ .. versionadded:: 3.3
+
.. seealso::
diff --git a/Doc/library/logging.rst b/Doc/library/logging.rst
index b6622e3..0d956b0 100644
--- a/Doc/library/logging.rst
+++ b/Doc/library/logging.rst
@@ -213,6 +213,9 @@ is the module's name in the Python package namespace.
Logs a message with level :const:`WARNING` on this logger. The arguments are
interpreted as for :meth:`debug`.
+ .. note:: There is an obsolete method ``warn`` which is functionally
+ identical to ``warning``. As ``warn`` is deprecated, please do not use
+ it - use ``warning`` instead.
.. method:: Logger.error(msg, *args, **kwargs)
@@ -492,6 +495,19 @@ The useful mapping keys in a :class:`LogRecord` are given in the section on
want all logging times to be shown in GMT, set the ``converter``
attribute in the ``Formatter`` class.
+ .. versionchanged:: 3.3
+ Previously, the default ISO 8601 format was hard-coded as in this
+ example: ``2010-09-06 22:38:15,292`` where the part before the comma is
+ handled by a strptime format string (``'%Y-%m-%d %H:%M:%S'``), and the
+ part after the comma is a millisecond value. Because strptime does not
+ have a format placeholder for milliseconds, the millisecond value is
+ appended using another format string, ``'%s,%03d'`` – and both of these
+ format strings have been hardcoded into this method. With the change,
+ these strings are defined as class-level attributes which can be
+ overridden at the instance level when desired. The names of the
+ attributes are ``default_time_format`` (for the strptime format string)
+ and ``default_msec_format`` (for appending the millisecond value).
+
.. method:: formatException(exc_info)
Formats the specified exception information (a standard exception tuple as
@@ -901,8 +917,12 @@ functions.
.. function:: warning(msg, *args, **kwargs)
- Logs a message with level :const:`WARNING` on the root logger. The arguments are
- interpreted as for :func:`debug`.
+ Logs a message with level :const:`WARNING` on the root logger. The arguments
+ are interpreted as for :func:`debug`.
+
+ .. note:: There is an obsolete function ``warn`` which is functionally
+ identical to ``warning``. As ``warn`` is deprecated, please do not use
+ it - use ``warning`` instead.
.. function:: error(msg, *args, **kwargs)
@@ -1028,12 +1048,27 @@ functions.
| ``stream`` | Use the specified stream to initialize the |
| | StreamHandler. Note that this argument is |
| | incompatible with 'filename' - if both are |
- | | present, 'stream' is ignored. |
+ | | present, a ``ValueError`` is raised. |
+ +--------------+---------------------------------------------+
+ | ``handlers`` | If specified, this should be an iterable of |
+ | | already created handlers to add to the root |
+ | | logger. Any handlers which don't already |
+ | | have a formatter set will be assigned the |
+ | | default formatter created in this function. |
+ | | Note that this argument is incompatible |
+ | | with 'filename' or 'stream' - if both are |
+ | | present, a ``ValueError`` is raised. |
+--------------+---------------------------------------------+
.. versionchanged:: 3.2
The ``style`` argument was added.
+ .. versionchanged:: 3.3
+ The ``handlers`` argument was added. Additional checks were added to
+ catch situations where incompatible arguments are specified (e.g.
+ ``handlers`` together with ``stream`` or ``filename``, or ``stream``
+ together with ``filename``).
+
.. function:: shutdown()
diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst
new file mode 100644
index 0000000..f09fa08
--- /dev/null
+++ b/Doc/library/lzma.rst
@@ -0,0 +1,382 @@
+:mod:`lzma` --- Compression using the LZMA algorithm
+====================================================
+
+.. module:: lzma
+ :synopsis: A Python wrapper for the liblzma compression library.
+.. moduleauthor:: Nadeem Vawda <nadeem.vawda@gmail.com>
+.. sectionauthor:: Nadeem Vawda <nadeem.vawda@gmail.com>
+
+.. versionadded:: 3.3
+
+
+This module provides classes and convenience functions for compressing and
+decompressing data using the LZMA compression algorithm. Also included is a file
+interface supporting the ``.xz`` and legacy ``.lzma`` file formats used by the
+:program:`xz` utility, as well as raw compressed streams.
+
+The interface provided by this module is very similar to that of the :mod:`bz2`
+module. However, note that :class:`LZMAFile` is *not* thread-safe, unlike
+:class:`bz2.BZ2File`, so if you need to use a single :class:`LZMAFile` instance
+from multiple threads, it is necessary to protect it with a lock.
+
+
+.. exception:: LZMAError
+
+ This exception is raised when an error occurs during compression or
+ decompression, or while initializing the compressor/decompressor state.
+
+
+Reading and writing compressed files
+------------------------------------
+
+.. function:: open(filename, mode="rb", \*, format=None, check=-1, preset=None, filters=None, encoding=None, errors=None, newline=None)
+
+ Open an LZMA-compressed file in binary or text mode, returning a :term:`file
+ object`.
+
+ The *filename* argument can be either an actual file name (given as a
+ :class:`str` or :class:`bytes` object), in which case the named file is
+ opened, or it can be an existing file object to read from or write to.
+
+ The *mode* argument can be any of ``"r"``, ``"rb"``, ``"w"``, ``"wb"``,
+ ``"a"`` or ``"ab"`` for binary mode, or ``"rt"``, ``"wt"``, or ``"at"`` for
+ text mode. The default is ``"rb"``.
+
+ When opening a file for reading, the *format* and *filters* arguments have
+ the same meanings as for :class:`LZMADecompressor`. In this case, the *check*
+ and *preset* arguments should not be used.
+
+ When opening a file for writing, the *format*, *check*, *preset* and
+ *filters* arguments have the same meanings as for :class:`LZMACompressor`.
+
+ For binary mode, this function is equivalent to the :class:`LZMAFile`
+ constructor: ``LZMAFile(filename, mode, ...)``. In this case, the *encoding*,
+ *errors* and *newline* arguments must not be provided.
+
+ For text mode, a :class:`LZMAFile` object is created, and wrapped in an
+ :class:`io.TextIOWrapper` instance with the specified encoding, error
+ handling behavior, and line ending(s).
+
+
+.. class:: LZMAFile(filename=None, mode="r", \*, format=None, check=-1, preset=None, filters=None)
+
+ Open an LZMA-compressed file in binary mode.
+
+ An :class:`LZMAFile` can wrap an already-open :term:`file object`, or operate
+ directly on a named file. The *filename* argument specifies either the file
+ object to wrap, or the name of the file to open (as a :class:`str` or
+ :class:`bytes` object). When wrapping an existing file object, the wrapped
+ file will not be closed when the :class:`LZMAFile` is closed.
+
+ The *mode* argument can be either ``"r"`` for reading (default), ``"w"`` for
+ overwriting, or ``"a"`` for appending. These can equivalently be given as
+ ``"rb"``, ``"wb"``, and ``"ab"`` respectively.
+
+ If *filename* is a file object (rather than an actual file name), a mode of
+ ``"w"`` does not truncate the file, and is instead equivalent to ``"a"``.
+
+ When opening a file for reading, the input file may be the concatenation of
+ multiple separate compressed streams. These are transparently decoded as a
+ single logical stream.
+
+ When opening a file for reading, the *format* and *filters* arguments have
+ the same meanings as for :class:`LZMADecompressor`. In this case, the *check*
+ and *preset* arguments should not be used.
+
+ When opening a file for writing, the *format*, *check*, *preset* and
+ *filters* arguments have the same meanings as for :class:`LZMACompressor`.
+
+ :class:`LZMAFile` supports all the members specified by
+ :class:`io.BufferedIOBase`, except for :meth:`detach` and :meth:`truncate`.
+ Iteration and the :keyword:`with` statement are supported.
+
+ The following method is also provided:
+
+ .. method:: peek(size=-1)
+
+ Return buffered data without advancing the file position. At least one
+ byte of data will be returned, unless EOF has been reached. The exact
+ number of bytes returned is unspecified (the *size* argument is ignored).
+
+
+Compressing and decompressing data in memory
+--------------------------------------------
+
+.. class:: LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)
+
+ Create a compressor object, which can be used to compress data incrementally.
+
+ For a more convenient way of compressing a single chunk of data, see
+ :func:`compress`.
+
+ The *format* argument specifies what container format should be used.
+ Possible values are:
+
+ * :const:`FORMAT_XZ`: The ``.xz`` container format.
+ This is the default format.
+
+ * :const:`FORMAT_ALONE`: The legacy ``.lzma`` container format.
+ This format is more limited than ``.xz`` -- it does not support integrity
+ checks or multiple filters.
+
+ * :const:`FORMAT_RAW`: A raw data stream, not using any container format.
+ This format specifier does not support integrity checks, and requires that
+ you always specify a custom filter chain (for both compression and
+ decompression). Additionally, data compressed in this manner cannot be
+ decompressed using :const:`FORMAT_AUTO` (see :class:`LZMADecompressor`).
+
+ The *check* argument specifies the type of integrity check to include in the
+ compressed data. This check is used when decompressing, to ensure that the
+ data has not been corrupted. Possible values are:
+
+ * :const:`CHECK_NONE`: No integrity check.
+ This is the default (and the only acceptable value) for
+ :const:`FORMAT_ALONE` and :const:`FORMAT_RAW`.
+
+ * :const:`CHECK_CRC32`: 32-bit Cyclic Redundancy Check.
+
+ * :const:`CHECK_CRC64`: 64-bit Cyclic Redundancy Check.
+ This is the default for :const:`FORMAT_XZ`.
+
+ * :const:`CHECK_SHA256`: 256-bit Secure Hash Algorithm.
+
+ If the specified check is not supported, an :class:`LZMAError` is raised.
+
+ The compression settings can be specified either as a preset compression
+ level (with the *preset* argument), or in detail as a custom filter chain
+ (with the *filters* argument).
+
+ The *preset* argument (if provided) should be an integer between ``0`` and
+ ``9`` (inclusive), optionally OR-ed with the constant
+ :const:`PRESET_EXTREME`. If neither *preset* nor *filters* are given, the
+ default behavior is to use :const:`PRESET_DEFAULT` (preset level ``6``).
+ Higher presets produce smaller output, but make the compression process
+ slower.
+
+ .. note::
+
+ In addition to being more CPU-intensive, compression with higher presets
+ also requires much more memory (and produces output that needs more memory
+ to decompress). With preset ``9`` for example, the overhead for an
+ :class:`LZMACompressor` object can be as high as 800MiB. For this reason,
+ it is generally best to stick with the default preset.
+
+ The *filters* argument (if provided) should be a filter chain specifier.
+ See :ref:`filter-chain-specs` for details.
+
+ .. method:: compress(data)
+
+ Compress *data* (a :class:`bytes` object), returning a :class:`bytes`
+ object containing compressed data for at least part of the input. Some of
+ *data* may be buffered internally, for use in later calls to
+ :meth:`compress` and :meth:`flush`. The returned data should be
+ concatenated with the output of any previous calls to :meth:`compress`.
+
+ .. method:: flush()
+
+ Finish the compression process, returning a :class:`bytes` object
+ containing any data stored in the compressor's internal buffers.
+
+ The compressor cannot be used after this method has been called.
+
+
+.. class:: LZMADecompressor(format=FORMAT_AUTO, memlimit=None, filters=None)
+
+ Create a decompressor object, which can be used to decompress data
+ incrementally.
+
+ For a more convenient way of decompressing an entire compressed stream at
+ once, see :func:`decompress`.
+
+ The *format* argument specifies the container format that should be used. The
+ default is :const:`FORMAT_AUTO`, which can decompress both ``.xz`` and
+ ``.lzma`` files. Other possible values are :const:`FORMAT_XZ`,
+ :const:`FORMAT_ALONE`, and :const:`FORMAT_RAW`.
+
+ The *memlimit* argument specifies a limit (in bytes) on the amount of memory
+ that the decompressor can use. When this argument is used, decompression will
+ fail with an :class:`LZMAError` if it is not possible to decompress the input
+ within the given memory limit.
+
+ The *filters* argument specifies the filter chain that was used to create
+ the stream being decompressed. This argument is required if *format* is
+ :const:`FORMAT_RAW`, but should not be used for other formats.
+ See :ref:`filter-chain-specs` for more information about filter chains.
+
+ .. note::
+ This class does not transparently handle inputs containing multiple
+ compressed streams, unlike :func:`decompress` and :class:`LZMAFile`. To
+ decompress a multi-stream input with :class:`LZMADecompressor`, you must
+ create a new decompressor for each stream.
+
+ .. method:: decompress(data)
+
+ Decompress *data* (a :class:`bytes` object), returning a :class:`bytes`
+ object containing the decompressed data for at least part of the input.
+ Some of *data* may be buffered internally, for use in later calls to
+ :meth:`decompress`. The returned data should be concatenated with the
+ output of any previous calls to :meth:`decompress`.
+
+ .. attribute:: check
+
+ The ID of the integrity check used by the input stream. This may be
+ :const:`CHECK_UNKNOWN` until enough of the input has been decoded to
+ determine what integrity check it uses.
+
+ .. attribute:: eof
+
+ True if the end-of-stream marker has been reached.
+
+ .. attribute:: unused_data
+
+ Data found after the end of the compressed stream.
+
+ Before the end of the stream is reached, this will be ``b""``.
+
+
+.. function:: compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None)
+
+ Compress *data* (a :class:`bytes` object), returning the compressed data as a
+ :class:`bytes` object.
+
+ See :class:`LZMACompressor` above for a description of the *format*, *check*,
+ *preset* and *filters* arguments.
+
+
+.. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None)
+
+ Decompress *data* (a :class:`bytes` object), returning the uncompressed data
+ as a :class:`bytes` object.
+
+ If *data* is the concatenation of multiple distinct compressed streams,
+ decompress all of these streams, and return the concatenation of the results.
+
+ See :class:`LZMADecompressor` above for a description of the *format*,
+ *memlimit* and *filters* arguments.
+
+
+Miscellaneous
+-------------
+
+.. function:: is_check_supported(check)
+
+ Returns true if the given integrity check is supported on this system.
+
+ :const:`CHECK_NONE` and :const:`CHECK_CRC32` are always supported.
+ :const:`CHECK_CRC64` and :const:`CHECK_SHA256` may be unavailable if you are
+ using a version of :program:`liblzma` that was compiled with a limited
+ feature set.
+
+
+.. _filter-chain-specs:
+
+Specifying custom filter chains
+-------------------------------
+
+A filter chain specifier is a sequence of dictionaries, where each dictionary
+contains the ID and options for a single filter. Each dictionary must contain
+the key ``"id"``, and may contain additional keys to specify filter-dependent
+options. Valid filter IDs are as follows:
+
+* Compression filters:
+ * :const:`FILTER_LZMA1` (for use with :const:`FORMAT_ALONE`)
+ * :const:`FILTER_LZMA2` (for use with :const:`FORMAT_XZ` and :const:`FORMAT_RAW`)
+
+* Delta filter:
+ * :const:`FILTER_DELTA`
+
+* Branch-Call-Jump (BCJ) filters:
+ * :const:`FILTER_X86`
+ * :const:`FILTER_IA64`
+ * :const:`FILTER_ARM`
+ * :const:`FILTER_ARMTHUMB`
+ * :const:`FILTER_POWERPC`
+ * :const:`FILTER_SPARC`
+
+A filter chain can consist of up to 4 filters, and cannot be empty. The last
+filter in the chain must be a compression filter, and any other filters must be
+delta or BCJ filters.
+
+Compression filters support the following options (specified as additional
+entries in the dictionary representing the filter):
+
+ * ``preset``: A compression preset to use as a source of default values for
+ options that are not specified explicitly.
+ * ``dict_size``: Dictionary size in bytes. This should be between 4KiB and
+ 1.5GiB (inclusive).
+ * ``lc``: Number of literal context bits.
+ * ``lp``: Number of literal position bits. The sum ``lc + lp`` must be at
+ most 4.
+ * ``pb``: Number of position bits; must be at most 4.
+ * ``mode``: :const:`MODE_FAST` or :const:`MODE_NORMAL`.
+ * ``nice_len``: What should be considered a "nice length" for a match.
+ This should be 273 or less.
+ * ``mf``: What match finder to use -- :const:`MF_HC3`, :const:`MF_HC4`,
+ :const:`MF_BT2`, :const:`MF_BT3`, or :const:`MF_BT4`.
+ * ``depth``: Maximum search depth used by match finder. 0 (default) means to
+ select automatically based on other filter options.
+
+The delta filter stores the differences between bytes, producing more repetitive
+input for the compressor in certain circumstances. It only supports a single
+The delta filter supports only one option, ``dist``. This indicates the distance
+between bytes to be subtracted. The default is 1, i.e. take the differences
+between adjacent bytes.
+
+The BCJ filters are intended to be applied to machine code. They convert
+relative branches, calls and jumps in the code to use absolute addressing, with
+the aim of increasing the redundancy that can be exploited by the compressor.
+These filters support one option, ``start_offset``. This specifies the address
+that should be mapped to the beginning of the input data. The default is 0.
+
+
+Examples
+--------
+
+Reading in a compressed file::
+
+ import lzma
+ with lzma.open("file.xz") as f:
+ file_content = f.read()
+
+Creating a compressed file::
+
+ import lzma
+ data = b"Insert Data Here"
+ with lzma.open("file.xz", "w") as f:
+ f.write(data)
+
+Compressing data in memory::
+
+ import lzma
+ data_in = b"Insert Data Here"
+ data_out = lzma.compress(data_in)
+
+Incremental compression::
+
+ import lzma
+ lzc = lzma.LZMACompressor()
+ out1 = lzc.compress(b"Some data\n")
+ out2 = lzc.compress(b"Another piece of data\n")
+ out3 = lzc.compress(b"Even more data\n")
+ out4 = lzc.flush()
+ # Concatenate all the partial results:
+ result = b"".join([out1, out2, out3, out4])
+
+Writing compressed data to an already-open file::
+
+ import lzma
+ with open("file.xz", "wb") as f:
+ f.write(b"This data will not be compressed\n")
+ with lzma.open(f, "w") as lzf:
+ lzf.write(b"This *will* be compressed\n")
+ f.write(b"Not compressed\n")
+
+Creating a compressed file using a custom filter chain::
+
+ import lzma
+ my_filters = [
+ {"id": lzma.FILTER_DELTA, "dist": 5},
+ {"id": lzma.FILTER_LZMA2, "preset": 7 | lzma.PRESET_EXTREME},
+ ]
+ with lzma.open("file.xz", "w", filters=my_filters) as f:
+ f.write(b"blah blah blah")
diff --git a/Doc/library/markup.rst b/Doc/library/markup.rst
index 1b4cca5..1588aa8 100644
--- a/Doc/library/markup.rst
+++ b/Doc/library/markup.rst
@@ -9,20 +9,13 @@ data markup. This includes modules to work with the Standard Generalized Markup
Language (SGML) and the Hypertext Markup Language (HTML), and several interfaces
for working with the Extensible Markup Language (XML).
-It is important to note that modules in the :mod:`xml` package require that
-there be at least one SAX-compliant XML parser available. The Expat parser is
-included with Python, so the :mod:`xml.parsers.expat` module will always be
-available.
-
-The documentation for the :mod:`xml.dom` and :mod:`xml.sax` packages are the
-definition of the Python bindings for the DOM and SAX interfaces.
-
.. toctree::
html.rst
html.parser.rst
html.entities.rst
+ xml.rst
xml.etree.elementtree.rst
xml.dom.rst
xml.dom.minidom.rst
diff --git a/Doc/library/math.rst b/Doc/library/math.rst
index 98c5b33..62c0f34 100644
--- a/Doc/library/math.rst
+++ b/Doc/library/math.rst
@@ -184,6 +184,19 @@ Power and logarithmic functions
result is calculated in a way which is accurate for *x* near zero.
+.. function:: log2(x)
+
+ Return the base-2 logarithm of *x*. This is usually more accurate than
+ ``log(x, 2)``.
+
+ .. versionadded:: 3.3
+
+ .. seealso::
+
+ :meth:`int.bit_length` returns the number of bits necessary to represent
+ an integer in binary, excluding the sign and leading zeros.
+
+
.. function:: log10(x)
Return the base-10 logarithm of *x*. This is usually more accurate
diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst
index 5f0f004..1a19a7e 100644
--- a/Doc/library/mmap.rst
+++ b/Doc/library/mmap.rst
@@ -196,12 +196,16 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
move will raise a :exc:`TypeError` exception.
- .. method:: read(num)
+ .. method:: read([n])
- Return a :class:`bytes` containing up to *num* bytes starting from the
- current file position; the file position is updated to point after the
- bytes that were returned.
+ Return a :class:`bytes` containing up to *n* bytes starting from the
+ current file position. If the argument is omitted, *None* or negative,
+ return all bytes from the current file position to the end of the
+ mapping. The file position is updated to point after the bytes that were
+ returned.
+ .. versionchanged:: 3.3
+ Argument can be omitted or *None*.
.. method:: read_byte()
diff --git a/Doc/library/msvcrt.rst b/Doc/library/msvcrt.rst
index 889a0c5..9d23720 100644
--- a/Doc/library/msvcrt.rst
+++ b/Doc/library/msvcrt.rst
@@ -20,6 +20,11 @@ api. The normal API deals only with ASCII characters and is of limited use
for internationalized applications. The wide char API should be used where
ever possible
+.. versionchanged:: 3.3
+ Operations in this module now raise :exc:`OSError` where :exc:`IOError`
+ was raised.
+
+
.. _msvcrt-files:
File Operations
@@ -29,7 +34,7 @@ File Operations
.. function:: locking(fd, mode, nbytes)
Lock part of a file based on file descriptor *fd* from the C runtime. Raises
- :exc:`IOError` on failure. The locked region of the file extends from the
+ :exc:`OSError` on failure. The locked region of the file extends from the
current file position for *nbytes* bytes, and may continue beyond the end of the
file. *mode* must be one of the :const:`LK_\*` constants listed below. Multiple
regions in a file may be locked at the same time, but may not overlap. Adjacent
@@ -41,13 +46,13 @@ File Operations
Locks the specified bytes. If the bytes cannot be locked, the program
immediately tries again after 1 second. If, after 10 attempts, the bytes cannot
- be locked, :exc:`IOError` is raised.
+ be locked, :exc:`OSError` is raised.
.. data:: LK_NBLCK
LK_NBRLCK
- Locks the specified bytes. If the bytes cannot be locked, :exc:`IOError` is
+ Locks the specified bytes. If the bytes cannot be locked, :exc:`OSError` is
raised.
@@ -73,7 +78,7 @@ File Operations
.. function:: get_osfhandle(fd)
- Return the file handle for the file descriptor *fd*. Raises :exc:`IOError` if
+ Return the file handle for the file descriptor *fd*. Raises :exc:`OSError` if
*fd* is not recognized.
@@ -144,4 +149,4 @@ Other Functions
.. function:: heapmin()
Force the :c:func:`malloc` heap to clean itself up and return unused blocks to
- the operating system. On failure, this raises :exc:`IOError`.
+ the operating system. On failure, this raises :exc:`OSError`.
diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst
index 4271fc2..82c8610 100644
--- a/Doc/library/multiprocessing.rst
+++ b/Doc/library/multiprocessing.rst
@@ -29,7 +29,7 @@ Windows.
Functionality within this package requires that the ``__main__`` module be
importable by the children. This is covered in :ref:`multiprocessing-programming`
however it is worth pointing out here. This means that some examples, such
- as the :class:`multiprocessing.Pool` examples will not work in the
+ as the :class:`multiprocessing.pool.Pool` examples will not work in the
interactive interpreter. For example::
>>> from multiprocessing import Pool
@@ -121,9 +121,7 @@ processes:
print(q.get()) # prints "[42, None, 'hello']"
p.join()
- Queues are thread and process safe, but note that they must never
- be instantiated as a side effect of importing a module: this can lead
- to a deadlock! (see :ref:`threaded-imports`)
+ Queues are thread and process safe.
**Pipes**
@@ -229,11 +227,11 @@ However, if you really do need to use some shared data then
holds Python objects and allows other processes to manipulate them using
proxies.
- A manager returned by :func:`Manager` will support types :class:`list`,
- :class:`dict`, :class:`Namespace`, :class:`Lock`, :class:`RLock`,
- :class:`Semaphore`, :class:`BoundedSemaphore`, :class:`Condition`,
- :class:`Event`, :class:`Queue`, :class:`Value` and :class:`Array`. For
- example, ::
+ A manager returned by :func:`Manager` will support types
+ :class:`list`, :class:`dict`, :class:`Namespace`, :class:`Lock`,
+ :class:`RLock`, :class:`Semaphore`, :class:`BoundedSemaphore`,
+ :class:`Condition`, :class:`Event`, :class:`Barrier`,
+ :class:`Queue`, :class:`Value` and :class:`Array`. For example, ::
from multiprocessing import Process, Manager
@@ -244,17 +242,16 @@ However, if you really do need to use some shared data then
l.reverse()
if __name__ == '__main__':
- manager = Manager()
+ with Manager() as manager:
+ d = manager.dict()
+ l = manager.list(range(10))
- d = manager.dict()
- l = manager.list(range(10))
+ p = Process(target=f, args=(d, l))
+ p.start()
+ p.join()
- p = Process(target=f, args=(d, l))
- p.start()
- p.join()
-
- print(d)
- print(l)
+ print(d)
+ print(l)
will print ::
@@ -282,10 +279,10 @@ For example::
return x*x
if __name__ == '__main__':
- pool = Pool(processes=4) # start 4 worker processes
- result = pool.apply_async(f, [10]) # evaluate "f(10)" asynchronously
- print(result.get(timeout=1)) # prints "100" unless your computer is *very* slow
- print(pool.map(f, range(10))) # prints "[0, 1, 4,..., 81]"
+ with Pool(processes=4) as pool # start 4 worker processes
+ result = pool.apply_async(f, [10]) # evaluate "f(10)" asynchronously
+ print(result.get(timeout=1)) # prints "100" unless your computer is *very* slow
+ print(pool.map(f, range(10))) # prints "[0, 1, 4,..., 81]"
Reference
@@ -298,7 +295,8 @@ The :mod:`multiprocessing` package mostly replicates the API of the
:class:`Process` and exceptions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. class:: Process(group=None, target=None, name=None, args=(), kwargs={})
+.. class:: Process(group=None, target=None, name=None, args=(), kwargs={}, \
+ *, daemon=None)
Process objects represent activity that is run in a separate process. The
:class:`Process` class has equivalents of all the methods of
@@ -308,18 +306,22 @@ The :mod:`multiprocessing` package mostly replicates the API of the
should always be ``None``; it exists solely for compatibility with
:class:`threading.Thread`. *target* is the callable object to be invoked by
the :meth:`run()` method. It defaults to ``None``, meaning nothing is
- called. *name* is the process name. By default, a unique name is constructed
- of the form 'Process-N\ :sub:`1`:N\ :sub:`2`:...:N\ :sub:`k`' where N\
- :sub:`1`,N\ :sub:`2`,...,N\ :sub:`k` is a sequence of integers whose length
- is determined by the *generation* of the process. *args* is the argument
- tuple for the target invocation. *kwargs* is a dictionary of keyword
- arguments for the target invocation. By default, no arguments are passed to
- *target*.
+ called. *name* is the process name (see :attr:`name` for more details).
+ *args* is the argument tuple for the target invocation. *kwargs* is a
+ dictionary of keyword arguments for the target invocation. If provided,
+ the keyword-only *daemon* argument sets the process :attr:`daemon` flag
+ to ``True`` or ``False``. If ``None`` (the default), this flag will be
+ inherited from the creating process.
+
+ By default, no arguments are passed to *target*.
If a subclass overrides the constructor, it must make sure it invokes the
base class constructor (:meth:`Process.__init__`) before doing anything else
to the process.
+ .. versionchanged:: 3.3
+ Added the *daemon* argument.
+
.. method:: run()
Method representing the process's activity.
@@ -338,10 +340,9 @@ The :mod:`multiprocessing` package mostly replicates the API of the
.. method:: join([timeout])
- Block the calling thread until the process whose :meth:`join` method is
- called terminates or until the optional timeout occurs.
-
- If *timeout* is ``None`` then there is no timeout.
+ If the optional argument *timeout* is ``None`` (the default), the method
+ blocks until the process whose :meth:`join` method is called terminates.
+ If *timeout* is a positive number, it blocks at most *timeout* seconds.
A process can be joined many times.
@@ -350,11 +351,14 @@ The :mod:`multiprocessing` package mostly replicates the API of the
.. attribute:: name
- The process's name.
+ The process's name. The name is a string used for identification purposes
+ only. It has no semantics. Multiple processes may be given the same
+ name.
- The name is a string used for identification purposes only. It has no
- semantics. Multiple processes may be given the same name. The initial
- name is set by the constructor.
+ The initial name is set by the constructor. If no explicit name is
+ provided to the constructor, a name of the form
+ 'Process-N\ :sub:`1`:N\ :sub:`2`:...:N\ :sub:`k`' is constructed, where
+ each N\ :sub:`k` is the N-th child of its parent.
.. method:: is_alive
@@ -406,6 +410,21 @@ The :mod:`multiprocessing` package mostly replicates the API of the
See :ref:`multiprocessing-auth-keys`.
+ .. attribute:: sentinel
+
+ A numeric handle of a system object which will become "ready" when
+ the process ends.
+
+ You can use this value if you want to wait on several events at
+ once using :func:`multiprocessing.connection.wait`. Otherwise
+ calling :meth:`join()` is simpler.
+
+ On Windows, this is an OS handle usable with the ``WaitForSingleObject``
+ and ``WaitForMultipleObjects`` family of API calls. On Unix, this is
+ a file descriptor usable with primitives from the :mod:`select` module.
+
+ .. versionadded:: 3.3
+
.. method:: terminate()
Terminate the process. On Unix this is done using the ``SIGTERM`` signal;
@@ -445,6 +464,9 @@ The :mod:`multiprocessing` package mostly replicates the API of the
>>> p.exitcode == -signal.SIGTERM
True
+.. exception:: ProcessError
+
+ The base class of all :mod:`multiprocessing` exceptions.
.. exception:: BufferTooShort
@@ -454,6 +476,13 @@ The :mod:`multiprocessing` package mostly replicates the API of the
If ``e`` is an instance of :exc:`BufferTooShort` then ``e.args[0]`` will give
the message as a byte string.
+.. exception:: AuthenticationError
+
+ Raised when there is an authentication error.
+
+.. exception:: TimeoutError
+
+ Raised by methods with a timeout when the timeout expires.
Pipes and Queues
~~~~~~~~~~~~~~~~
@@ -465,7 +494,7 @@ primitives like locks.
For passing messages one can use :func:`Pipe` (for a connection between two
processes) or a queue (which allows multiple producers and consumers).
-The :class:`Queue`, :class:`multiprocessing.queues.SimpleQueue` and :class:`JoinableQueue` types are multi-producer,
+The :class:`Queue`, :class:`SimpleQueue` and :class:`JoinableQueue` types are multi-producer,
multi-consumer FIFO queues modelled on the :class:`queue.Queue` class in the
standard library. They differ in that :class:`Queue` lacks the
:meth:`~queue.Queue.task_done` and :meth:`~queue.Queue.join` methods introduced
@@ -611,7 +640,7 @@ For an example of the usage of queues for interprocess communication see
exits -- see :meth:`join_thread`.
-.. class:: multiprocessing.queues.SimpleQueue()
+.. class:: SimpleQueue()
It is a simplified :class:`Queue` type, very close to a locked :class:`Pipe`.
@@ -635,8 +664,8 @@ For an example of the usage of queues for interprocess communication see
.. method:: task_done()
- Indicate that a formerly enqueued task is complete. Used by queue consumer
- threads. For each :meth:`~Queue.get` used to fetch a task, a subsequent
+ Indicate that a formerly enqueued task is complete. Used by queue
+ consumers. For each :meth:`~Queue.get` used to fetch a task, a subsequent
call to :meth:`task_done` tells the queue that the processing on the task
is complete.
@@ -653,7 +682,7 @@ For an example of the usage of queues for interprocess communication see
Block until all items in the queue have been gotten and processed.
The count of unfinished tasks goes up whenever an item is added to the
- queue. The count goes down whenever a consumer thread calls
+ queue. The count goes down whenever a consumer calls
:meth:`task_done` to indicate that the item was retrieved and all work on
it is complete. When the count of unfinished tasks drops to zero,
:meth:`~Queue.join` unblocks.
@@ -767,6 +796,9 @@ Connection objects are usually created using :func:`Pipe` -- see also
*timeout* is a number then this specifies the maximum time in seconds to
block. If *timeout* is ``None`` then an infinite timeout is used.
+ Note that multiple connection objects may be polled at once by
+ using :func:`multiprocessing.connection.wait`.
+
.. method:: send_bytes(buffer[, offset[, size]])
Send byte data from an object supporting the buffer interface as a
@@ -785,9 +817,14 @@ Connection objects are usually created using :func:`Pipe` -- see also
to receive and the other end has closed.
If *maxlength* is specified and the message is longer than *maxlength*
- then :exc:`IOError` is raised and the connection will no longer be
+ then :exc:`OSError` is raised and the connection will no longer be
readable.
+ .. versionchanged:: 3.3
+ This function used to raise a :exc:`IOError`, which is now an
+ alias of :exc:`OSError`.
+
+
.. method:: recv_bytes_into(buffer[, offset])
Read into *buffer* a complete message of byte data sent from the other end
@@ -805,6 +842,14 @@ Connection objects are usually created using :func:`Pipe` -- see also
raised and the complete message is available as ``e.args[0]`` where ``e``
is the exception instance.
+ .. versionchanged:: 3.3
+ Connection objects themselves can now be transferred between processes
+ using :meth:`Connection.send` and :meth:`Connection.recv`.
+
+ .. versionadded:: 3.3
+ Connection objects now support the context manager protocol -- see
+ :ref:`typecontextmanager`. :meth:`__enter__` returns the
+ connection object, and :meth:`__exit__` calls :meth:`close`.
For example:
@@ -856,6 +901,12 @@ program as they are in a multithreaded program. See the documentation for
Note that one can also create synchronization primitives by using a manager
object -- see :ref:`multiprocessing-managers`.
+.. class:: Barrier(parties[, action[, timeout]])
+
+ A barrier object: a clone of :class:`threading.Barrier`.
+
+ .. versionadded:: 3.3
+
.. class:: BoundedSemaphore([value])
A bounded semaphore object: a clone of :class:`threading.BoundedSemaphore`.
@@ -865,20 +916,17 @@ object -- see :ref:`multiprocessing-managers`.
.. class:: Condition([lock])
- A condition variable: a clone of :class:`threading.Condition`.
+ A condition variable: an alias for :class:`threading.Condition`.
If *lock* is specified then it should be a :class:`Lock` or :class:`RLock`
object from :mod:`multiprocessing`.
+ .. versionchanged:: 3.3
+ The :meth:`wait_for` method was added.
+
.. class:: Event()
A clone of :class:`threading.Event`.
- This method returns the state of the internal semaphore on exit, so it
- will always return ``True`` except if a timeout is given and the operation
- times out.
-
- .. versionchanged:: 3.1
- Previously, the method always returned ``None``.
.. class:: Lock()
@@ -894,6 +942,12 @@ object -- see :ref:`multiprocessing-managers`.
.. note::
+ The :meth:`acquire` and :meth:`wait` methods of each of these types
+ treat negative timeouts as zero timeouts. This differs from
+ :mod:`threading` where, since version 3.2, the equivalent
+ :meth:`acquire` methods treat negative timeouts as infinite
+ timeouts.
+
On Mac OS X, ``sem_timedwait`` is unsupported, so calling ``acquire()`` with
a timeout will emulate that function's behavior using a sleeping loop.
@@ -915,10 +969,11 @@ Shared :mod:`ctypes` Objects
It is possible to create shared objects using shared memory which can be
inherited by child processes.
-.. function:: Value(typecode_or_type, *args[, lock])
+.. function:: Value(typecode_or_type, *args, lock=True)
Return a :mod:`ctypes` object allocated from shared memory. By default the
- return value is actually a synchronized wrapper for the object.
+ return value is actually a synchronized wrapper for the object. The object
+ itself can be accessed via the *value* attribute of a :class:`Value`.
*typecode_or_type* determines the type of the returned object: it is either a
ctypes type or a one character typecode of the kind used by the :mod:`array`
@@ -1007,7 +1062,7 @@ processes.
attributes which allow one to use it to store and retrieve strings -- see
documentation for :mod:`ctypes`.
-.. function:: Array(typecode_or_type, size_or_initializer, *args[, lock])
+.. function:: Array(typecode_or_type, size_or_initializer, *, lock=True)
The same as :func:`RawArray` except that depending on the value of *lock* a
process-safe synchronization wrapper may be returned instead of a raw ctypes
@@ -1022,7 +1077,7 @@ processes.
Note that *lock* is a keyword-only argument.
-.. function:: Value(typecode_or_type, *args[, lock])
+.. function:: Value(typecode_or_type, *args, lock=True)
The same as :func:`RawValue` except that depending on the value of *lock* a
process-safe synchronization wrapper may be returned instead of a raw ctypes
@@ -1124,8 +1179,10 @@ Managers
~~~~~~~~
Managers provide a way to create data which can be shared between different
-processes. A manager object controls a server process which manages *shared
-objects*. Other processes can access the shared objects by using proxies.
+processes, including sharing over a network between processes running on
+different machines. A manager object controls a server process which manages
+*shared objects*. Other processes can access the shared objects by using
+proxies.
.. function:: multiprocessing.Manager()
@@ -1198,9 +1255,10 @@ their parent process exits. The manager classes are defined in the
type of shared object. This must be a string.
*callable* is a callable used for creating objects for this type
- identifier. If a manager instance will be created using the
- :meth:`from_address` classmethod or if the *create_method* argument is
- ``False`` then this can be left as ``None``.
+ identifier. If a manager instance will be connected to the
+ server using the :meth:`connect` method, or if the
+ *create_method* argument is ``False`` then this can be left as
+ ``None``.
*proxytype* is a subclass of :class:`BaseProxy` which is used to create
proxies for shared objects with this *typeid*. If ``None`` then a proxy
@@ -1232,6 +1290,14 @@ their parent process exits. The manager classes are defined in the
The address used by the manager.
+ .. versionchanged:: 3.3
+ Manager objects support the context manager protocol -- see
+ :ref:`typecontextmanager`. :meth:`__enter__` starts the server
+ process (if it has not already started) and then returns the
+ manager object. :meth:`__exit__` calls :meth:`shutdown`.
+
+ In previous versions :meth:`__enter__` did not start the
+ manager's server process if it was not already started.
.. class:: SyncManager
@@ -1241,6 +1307,13 @@ their parent process exits. The manager classes are defined in the
It also supports creation of shared lists and dictionaries.
+ .. method:: Barrier(parties[, action[, timeout]])
+
+ Create a shared :class:`threading.Barrier` object and return a
+ proxy for it.
+
+ .. versionadded:: 3.3
+
.. method:: BoundedSemaphore([value])
Create a shared :class:`threading.BoundedSemaphore` object and return a
@@ -1254,6 +1327,9 @@ their parent process exits. The manager classes are defined in the
If *lock* is supplied then it should be a proxy for a
:class:`threading.Lock` or :class:`threading.RLock` object.
+ .. versionchanged:: 3.3
+ The :meth:`wait_for` method was added.
+
.. method:: Event()
Create a shared :class:`threading.Event` object and return a proxy for it.
@@ -1359,11 +1435,10 @@ callables with the manager class. For example::
MyManager.register('Maths', MathsClass)
if __name__ == '__main__':
- manager = MyManager()
- manager.start()
- maths = manager.Maths()
- print(maths.add(4, 3)) # prints 7
- print(maths.mul(7, 8)) # prints 56
+ with MyManager() as manager:
+ maths = manager.Maths()
+ print(maths.add(4, 3)) # prints 7
+ print(maths.mul(7, 8)) # prints 56
Using a remote manager
@@ -1563,7 +1638,7 @@ Process Pools
One can create a pool of processes which will carry out tasks submitted to it
with the :class:`Pool` class.
-.. class:: multiprocessing.Pool([processes[, initializer[, initargs[, maxtasksperchild]]]])
+.. class:: Pool([processes[, initializer[, initargs[, maxtasksperchild]]]])
A process pool object which controls a pool of worker processes to which jobs
can be submitted. It supports asynchronous results with timeouts and
@@ -1658,6 +1733,24 @@ with the :class:`Pool` class.
returned iterator should be considered arbitrary. (Only when there is
only one worker process is the order guaranteed to be "correct".)
+ .. method:: starmap(func, iterable[, chunksize])
+
+ Like :meth:`map` except that the elements of the `iterable` are expected
+ to be iterables that are unpacked as arguments.
+
+ Hence an `iterable` of `[(1,2), (3, 4)]` results in `[func(1,2),
+ func(3,4)]`.
+
+ .. versionadded:: 3.3
+
+ .. method:: starmap_async(func, iterable[, chunksize[, callback[, error_back]]])
+
+ A combination of :meth:`starmap` and :meth:`map_async` that iterates over
+ `iterable` of iterables and calls `func` with the iterables unpacked.
+ Returns a result object.
+
+ .. versionadded:: 3.3
+
.. method:: close()
Prevents any more tasks from being submitted to the pool. Once all the
@@ -1674,6 +1767,11 @@ with the :class:`Pool` class.
Wait for the worker processes to exit. One must call :meth:`close` or
:meth:`terminate` before using :meth:`join`.
+ .. versionadded:: 3.3
+ Pool objects now support the context manager protocol -- see
+ :ref:`typecontextmanager`. :meth:`__enter__` returns the pool
+ object, and :meth:`__exit__` calls :meth:`terminate`.
+
.. class:: AsyncResult
@@ -1708,21 +1806,20 @@ The following example demonstrates the use of a pool::
return x*x
if __name__ == '__main__':
- pool = Pool(processes=4) # start 4 worker processes
+ with Pool(processes=4) as pool: # start 4 worker processes
+ result = pool.apply_async(f, (10,)) # evaluate "f(10)" asynchronously
+ print(result.get(timeout=1)) # prints "100" unless your computer is *very* slow
- result = pool.apply_async(f, (10,)) # evaluate "f(10)" asynchronously
- print(result.get(timeout=1)) # prints "100" unless your computer is *very* slow
+ print(pool.map(f, range(10))) # prints "[0, 1, 4,..., 81]"
- print(pool.map(f, range(10))) # prints "[0, 1, 4,..., 81]"
+ it = pool.imap(f, range(10))
+ print(next(it)) # prints "0"
+ print(next(it)) # prints "1"
+ print(it.next(timeout=1)) # prints "4" unless your computer is *very* slow
- it = pool.imap(f, range(10))
- print(next(it)) # prints "0"
- print(next(it)) # prints "1"
- print(it.next(timeout=1)) # prints "4" unless your computer is *very* slow
-
- import time
- result = pool.apply_async(time.sleep, (10,))
- print(result.get(timeout=1)) # raises TimeoutError
+ import time
+ result = pool.apply_async(time.sleep, (10,))
+ print(result.get(timeout=1)) # raises TimeoutError
.. _multiprocessing-listeners-clients:
@@ -1738,8 +1835,9 @@ Usually message passing between processes is done using queues or by using
However, the :mod:`multiprocessing.connection` module allows some extra
flexibility. It basically gives a high level message oriented API for dealing
-with sockets or Windows named pipes, and also has support for *digest
-authentication* using the :mod:`hmac` module.
+with sockets or Windows named pipes. It also has support for *digest
+authentication* using the :mod:`hmac` module, and for polling
+multiple connections at the same time.
.. function:: deliver_challenge(connection, authkey)
@@ -1749,15 +1847,15 @@ authentication* using the :mod:`hmac` module.
If the reply matches the digest of the message using *authkey* as the key
then a welcome message is sent to the other end of the connection. Otherwise
- :exc:`AuthenticationError` is raised.
+ :exc:`~multiprocessing.AuthenticationError` is raised.
.. function:: answerChallenge(connection, authkey)
Receive a message, calculate the digest of the message using *authkey* as the
key, and then send the digest back.
- If a welcome message is not received, then :exc:`AuthenticationError` is
- raised.
+ If a welcome message is not received, then
+ :exc:`~multiprocessing.AuthenticationError` is raised.
.. function:: Client(address[, family[, authenticate[, authkey]]])
@@ -1771,7 +1869,8 @@ authentication* using the :mod:`hmac` module.
If *authenticate* is ``True`` or *authkey* is a byte string then digest
authentication is used. The key used for authentication will be either
*authkey* or ``current_process().authkey`` if *authkey* is ``None``.
- If authentication fails then :exc:`AuthenticationError` is raised. See
+ If authentication fails then
+ :exc:`~multiprocessing.AuthenticationError` is raised. See
:ref:`multiprocessing-auth-keys`.
.. class:: Listener([address[, family[, backlog[, authenticate[, authkey]]]]])
@@ -1812,13 +1911,15 @@ authentication* using the :mod:`hmac` module.
``current_process().authkey`` is used as the authentication key. If
*authkey* is ``None`` and *authenticate* is ``False`` then no
authentication is done. If authentication fails then
- :exc:`AuthenticationError` is raised. See :ref:`multiprocessing-auth-keys`.
+ :exc:`~multiprocessing.AuthenticationError` is raised.
+ See :ref:`multiprocessing-auth-keys`.
.. method:: accept()
Accept a connection on the bound socket or named pipe of the listener
object and return a :class:`Connection` object. If authentication is
- attempted and fails, then :exc:`AuthenticationError` is raised.
+ attempted and fails, then
+ :exc:`~multiprocessing.AuthenticationError` is raised.
.. method:: close()
@@ -1837,12 +1938,44 @@ authentication* using the :mod:`hmac` module.
The address from which the last accepted connection came. If this is
unavailable then it is ``None``.
+ .. versionadded:: 3.3
+ Listener objects now support the context manager protocol -- see
+ :ref:`typecontextmanager`. :meth:`__enter__` returns the
+ listener object, and :meth:`__exit__` calls :meth:`close`.
-The module defines two exceptions:
+.. function:: wait(object_list, timeout=None)
-.. exception:: AuthenticationError
+ Wait till an object in *object_list* is ready. Returns the list of
+ those objects in *object_list* which are ready. If *timeout* is a
+ float then the call blocks for at most that many seconds. If
+ *timeout* is ``None`` then it will block for an unlimited period.
+ A negative timeout is equivalent to a zero timeout.
+
+ For both Unix and Windows, an object can appear in *object_list* if
+ it is
+
+ * a readable :class:`~multiprocessing.Connection` object;
+ * a connected and readable :class:`socket.socket` object; or
+ * the :attr:`~multiprocessing.Process.sentinel` attribute of a
+ :class:`~multiprocessing.Process` object.
+
+ A connection or socket object is ready when there is data available
+ to be read from it, or the other end has been closed.
+
+ **Unix**: ``wait(object_list, timeout)`` almost equivalent
+ ``select.select(object_list, [], [], timeout)``. The difference is
+ that, if :func:`select.select` is interrupted by a signal, it can
+ raise :exc:`OSError` with an error number of ``EINTR``, whereas
+ :func:`wait` will not.
+
+ **Windows**: An item in *object_list* must either be an integer
+ handle which is waitable (according to the definition used by the
+ documentation of the Win32 function ``WaitForMultipleObjects()``)
+ or it can be an object with a :meth:`fileno` method which returns a
+ socket handle or pipe handle. (Note that pipe handles and socket
+ handles are **not** waitable handles.)
- Exception raised when there is an authentication error.
+ .. versionadded:: 3.3
**Examples**
@@ -1855,19 +1988,16 @@ the client::
from array import array
address = ('localhost', 6000) # family is deduced to be 'AF_INET'
- listener = Listener(address, authkey=b'secret password')
- conn = listener.accept()
- print('connection accepted from', listener.last_accepted)
+ with Listener(address, authkey=b'secret password') as listener:
+ with listener.accept() as conn:
+ print('connection accepted from', listener.last_accepted)
- conn.send([2.25, None, 'junk', float])
+ conn.send([2.25, None, 'junk', float])
- conn.send_bytes(b'hello')
+ conn.send_bytes(b'hello')
- conn.send_bytes(array('i', [42, 1729]))
-
- conn.close()
- listener.close()
+ conn.send_bytes(array('i', [42, 1729]))
The following code connects to the server and receives some data from the
server::
@@ -1876,17 +2006,50 @@ server::
from array import array
address = ('localhost', 6000)
- conn = Client(address, authkey=b'secret password')
- print(conn.recv()) # => [2.25, None, 'junk', float]
+ with Client(address, authkey=b'secret password') as conn:
+ print(conn.recv()) # => [2.25, None, 'junk', float]
+
+ print(conn.recv_bytes()) # => 'hello'
+
+ arr = array('i', [0, 0, 0, 0, 0])
+ print(conn.recv_bytes_into(arr)) # => 8
+ print(arr) # => array('i', [42, 1729, 0, 0, 0])
+
+The following code uses :func:`~multiprocessing.connection.wait` to
+wait for messages from multiple processes at once::
- print(conn.recv_bytes()) # => 'hello'
+ import time, random
+ from multiprocessing import Process, Pipe, current_process
+ from multiprocessing.connection import wait
- arr = array('i', [0, 0, 0, 0, 0])
- print(conn.recv_bytes_into(arr)) # => 8
- print(arr) # => array('i', [42, 1729, 0, 0, 0])
+ def foo(w):
+ for i in range(10):
+ w.send((i, current_process().name))
+ w.close()
- conn.close()
+ if __name__ == '__main__':
+ readers = []
+
+ for i in range(4):
+ r, w = Pipe(duplex=False)
+ readers.append(r)
+ p = Process(target=foo, args=(w,))
+ p.start()
+ # We close the writable end of the pipe now to be sure that
+ # p is the only process which owns a handle for it. This
+ # ensures that when p closes its handle for the writable end,
+ # wait() will promptly report the readable end as being ready.
+ w.close()
+
+ while readers:
+ for r in wait(readers):
+ try:
+ msg = r.recv()
+ except EOFError:
+ readers.remove(r)
+ else:
+ print(msg)
.. _multiprocessing-address-formats:
@@ -2047,7 +2210,7 @@ Avoid shared state
It is probably best to stick to using queues or pipes for communication
between processes rather than using the lower level synchronization
- primitives from the :mod:`threading` module.
+ primitives.
Picklability
diff --git a/Doc/library/nntplib.rst b/Doc/library/nntplib.rst
index 247efb7..87a50b0 100644
--- a/Doc/library/nntplib.rst
+++ b/Doc/library/nntplib.rst
@@ -70,10 +70,23 @@ The module itself defines the following classes:
connecting to an NNTP server on the local machine and intend to call
reader-specific commands, such as ``group``. If you get unexpected
:exc:`NNTPPermanentError`\ s, you might need to set *readermode*.
+ :class:`NNTP` class supports the :keyword:`with` statement to
+ unconditionally consume :exc:`socket.error` exceptions and to close the NNTP
+ connection when done. Here is a sample on how using it:
+
+ >>> from nntplib import NNTP
+ >>> with NNTP('news.gmane.org') as n:
+ ... n.group('gmane.comp.python.committers')
+ ...
+ ('211 1755 1 1755 gmane.comp.python.committers', 1755, 1, 1755, 'gmane.comp.python.committers')
+ >>>
+
.. versionchanged:: 3.2
*usenetrc* is now False by default.
+ .. versionchanged:: 3.3
+ Support for the :keyword:`with` statement was added.
.. class:: NNTP_SSL(host, port=563, user=None, password=None, ssl_context=None, readermode=None, usenetrc=False, [timeout])
@@ -504,6 +517,9 @@ them have been superseded by newer commands in :rfc:`3977`.
article with message ID *id*. Most of the time, this extension is not
enabled by NNTP server administrators.
+ .. deprecated:: 3.3
+ The XPATH extension is not actively used.
+
.. XXX deprecated:
diff --git a/Doc/library/numeric.rst b/Doc/library/numeric.rst
index ba22cb6..2732a84 100644
--- a/Doc/library/numeric.rst
+++ b/Doc/library/numeric.rst
@@ -8,9 +8,9 @@ Numeric and Mathematical Modules
The modules described in this chapter provide numeric and math-related functions
and data types. The :mod:`numbers` module defines an abstract hierarchy of
numeric types. The :mod:`math` and :mod:`cmath` modules contain various
-mathematical functions for floating-point and complex numbers. For users more
-interested in decimal accuracy than in speed, the :mod:`decimal` module supports
-exact representations of decimal numbers.
+mathematical functions for floating-point and complex numbers. The :mod:`decimal`
+module supports exact representations of decimal numbers, using arbitrary precision
+arithmetic.
The following modules are documented in this chapter:
diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst
index 214e27c..20a84b6 100644
--- a/Doc/library/os.path.rst
+++ b/Doc/library/os.path.rst
@@ -70,11 +70,16 @@ applications should use string objects to access all files.
.. function:: exists(path)
- Return ``True`` if *path* refers to an existing path. Returns ``False`` for
- broken symbolic links. On some platforms, this function may return ``False`` if
- permission is not granted to execute :func:`os.stat` on the requested file, even
+ Return ``True`` if *path* refers to an existing path or an open
+ file descriptor. Returns ``False`` for broken symbolic links. On
+ some platforms, this function may return ``False`` if permission is
+ not granted to execute :func:`os.stat` on the requested file, even
if the *path* physically exists.
+ .. versionchanged:: 3.3
+ *path* can now be an integer: ``True`` is returned if it is an
+ open file descriptor, ``False`` otherwise.
+
.. function:: lexists(path)
@@ -254,7 +259,8 @@ applications should use string objects to access all files.
Availability: Unix, Windows.
- .. versionchanged:: 3.2 Added Windows support.
+ .. versionchanged:: 3.2
+ Added Windows support.
.. function:: samestat(stat1, stat2)
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index 410e03a..2e45de2 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -96,6 +96,13 @@ These functions and data items provide information and operate on the current
process and user.
+.. function:: ctermid()
+
+ Return the filename corresponding to the controlling terminal of the process.
+
+ Availability: Unix.
+
+
.. data:: environ
A mapping object representing the string environment. For example,
@@ -177,6 +184,28 @@ process and user.
.. versionadded:: 3.2
+.. function:: getenv(key, default=None)
+
+ Return the value of the environment variable *key* if it exists, or
+ *default* if it doesn't. *key*, *default* and the result are str.
+
+ On Unix, keys and values are decoded with :func:`sys.getfilesystemencoding`
+ and ``'surrogateescape'`` error handler. Use :func:`os.getenvb` if you
+ would like to use a different encoding.
+
+ Availability: most flavors of Unix, Windows.
+
+
+.. function:: getenvb(key, default=None)
+
+ Return the value of the environment variable *key* if it exists, or
+ *default* if it doesn't. *key*, *default* and the result are bytes.
+
+ Availability: most flavors of Unix.
+
+ .. versionadded:: 3.2
+
+
.. function:: get_exec_path(env=None)
Returns the list of directories that will be searched for a named
@@ -188,13 +217,6 @@ process and user.
.. versionadded:: 3.2
-.. function:: ctermid()
-
- Return the filename corresponding to the controlling terminal of the process.
-
- Availability: Unix.
-
-
.. function:: getegid()
Return the effective group id of the current process. This corresponds to the
@@ -221,6 +243,17 @@ process and user.
Availability: Unix.
+.. function:: getgrouplist(user, group)
+
+ Return list of group ids that *user* belongs to. If *group* is not in the
+ list, it is included; typically, *group* is specified as the group ID
+ field from the password record for *user*.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
.. function:: getgroups()
Return list of supplemental group ids associated with the current process.
@@ -242,17 +275,6 @@ process and user.
obtained with :func:`sysconfig.get_config_var`.
-.. function:: initgroups(username, gid)
-
- Call the system initgroups() to initialize the group access list with all of
- the groups of which the specified username is a member, plus the specified
- group id.
-
- Availability: Unix.
-
- .. versionadded:: 3.2
-
-
.. function:: getlogin()
Return the name of the user logged in on the controlling terminal of the
@@ -297,11 +319,40 @@ process and user.
the id returned is the one of the init process (1), on Windows it is still
the same id, which may be already reused by another process.
- Availability: Unix, Windows
+ Availability: Unix, Windows.
.. versionchanged:: 3.2
Added support for Windows.
+
+.. function:: getpriority(which, who)
+
+ .. index:: single: process; scheduling priority
+
+ Get program scheduling priority. The value *which* is one of
+ :const:`PRIO_PROCESS`, :const:`PRIO_PGRP`, or :const:`PRIO_USER`, and *who*
+ is interpreted relative to *which* (a process identifier for
+ :const:`PRIO_PROCESS`, process group identifier for :const:`PRIO_PGRP`, and a
+ user ID for :const:`PRIO_USER`). A zero value for *who* denotes
+ (respectively) the calling process, the process group of the calling process,
+ or the real user ID of the calling process.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. data:: PRIO_PROCESS
+ PRIO_PGRP
+ PRIO_USER
+
+ Parameters for the :func:`getpriority` and :func:`setpriority` functions.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
.. function:: getresuid()
Return a tuple (ruid, euid, suid) denoting the current process's
@@ -331,24 +382,13 @@ process and user.
Availability: Unix.
-.. function:: getenv(key, default=None)
-
- Return the value of the environment variable *key* if it exists, or
- *default* if it doesn't. *key*, *default* and the result are str.
-
- On Unix, keys and values are decoded with :func:`sys.getfilesystemencoding`
- and ``'surrogateescape'`` error handler. Use :func:`os.getenvb` if you
- would like to use a different encoding.
-
- Availability: most flavors of Unix, Windows.
-
-
-.. function:: getenvb(key, default=None)
+.. function:: initgroups(username, gid)
- Return the value of the environment variable *key* if it exists, or
- *default* if it doesn't. *key*, *default* and the result are bytes.
+ Call the system initgroups() to initialize the group access list with all of
+ the groups of which the specified username is a member, plus the specified
+ group id.
- Availability: most flavors of Unix.
+ Availability: Unix.
.. versionadded:: 3.2
@@ -410,7 +450,7 @@ process and user.
.. function:: setpgrp()
- Call the system call :c:func:`setpgrp` or :c:func:`setpgrp(0, 0)` depending on
+ Call the system call :c:func:`setpgrp` or ``setpgrp(0, 0)`` depending on
which version is implemented (if any). See the Unix manual for the semantics.
Availability: Unix.
@@ -425,6 +465,25 @@ process and user.
Availability: Unix.
+.. function:: setpriority(which, who, priority)
+
+ .. index:: single: process; scheduling priority
+
+ Set program scheduling priority. The value *which* is one of
+ :const:`PRIO_PROCESS`, :const:`PRIO_PGRP`, or :const:`PRIO_USER`, and *who*
+ is interpreted relative to *which* (a process identifier for
+ :const:`PRIO_PROCESS`, process group identifier for :const:`PRIO_PGRP`, and a
+ user ID for :const:`PRIO_USER`). A zero value for *who* denotes
+ (respectively) the calling process, the process group of the calling process,
+ or the real user ID of the calling process.
+ *priority* is a value in the range -20 to 19. The default priority is 0;
+ lower priorities cause more favorable scheduling.
+
+ Availability: Unix
+
+ .. versionadded:: 3.3
+
+
.. function:: setregid(rgid, egid)
Set the current process's real and effective group ids.
@@ -511,15 +570,31 @@ process and user.
single: gethostname() (in module socket)
single: gethostbyaddr() (in module socket)
- Return a 5-tuple containing information identifying the current operating
- system. The tuple contains 5 strings: ``(sysname, nodename, release, version,
- machine)``. Some systems truncate the nodename to 8 characters or to the
+ Returns information identifying the current operating system.
+ The return value is an object with five attributes:
+
+ * :attr:`sysname` - operating system name
+ * :attr:`nodename` - name of machine on network (implementation-defined)
+ * :attr:`release` - operating system release
+ * :attr:`version` - operating system version
+ * :attr:`machine` - hardware identifier
+
+ For backwards compatibility, this object is also iterable, behaving
+ like a five-tuple containing :attr:`sysname`, :attr:`nodename`,
+ :attr:`release`, :attr:`version`, and :attr:`machine`
+ in that order.
+
+ Some systems truncate :attr:`nodename` to 8 characters or to the
leading component; a better way to get the hostname is
:func:`socket.gethostname` or even
``socket.gethostbyaddr(socket.gethostname())``.
Availability: recent flavors of Unix.
+ .. versionchanged:: 3.3
+ Return type changed from a tuple to a tuple-like object
+ with named attributes.
+
.. function:: unsetenv(key)
@@ -542,15 +617,16 @@ process and user.
File Object Creation
--------------------
-These functions create new :term:`file objects <file object>`. (See also :func:`open`.)
+This function creates new :term:`file objects <file object>`. (See also
+:func:`~os.open` for opening file descriptors.)
.. function:: fdopen(fd, *args, **kwargs)
- Return an open file object connected to the file descriptor *fd*.
- This is an alias of :func:`open` and accepts the same arguments.
- The only difference is that the first argument of :func:`fdopen`
- must always be an integer.
+ Return an open file object connected to the file descriptor *fd*. This is an
+ alias of the :func:`open` built-in function and accepts the same arguments.
+ The only difference is that the first argument of :func:`fdopen` must always
+ be an integer.
.. _os-fd-ops:
@@ -572,6 +648,7 @@ associated with a :term:`file object` when required. Note that using the file
descriptor directly will bypass the file object methods, ignoring aspects such
as internal buffering of data.
+
.. function:: close(fd)
Close file descriptor *fd*.
@@ -589,7 +666,7 @@ as internal buffering of data.
.. function:: closerange(fd_low, fd_high)
Close all file descriptors from *fd_low* (inclusive) to *fd_high* (exclusive),
- ignoring errors. Equivalent to::
+ ignoring errors. Equivalent to (but much faster than)::
for fd in range(fd_low, fd_high):
try:
@@ -622,8 +699,9 @@ as internal buffering of data.
.. function:: fchmod(fd, mode)
- Change the mode of the file given by *fd* to the numeric *mode*. See the docs
- for :func:`chmod` for possible values of *mode*.
+ Change the mode of the file given by *fd* to the numeric *mode*. See the
+ docs for :func:`chmod` for possible values of *mode*. As of Python 3.3, this
+ is equivalent to ``os.chmod(fd, mode)``.
Availability: Unix.
@@ -631,7 +709,9 @@ as internal buffering of data.
.. function:: fchown(fd, uid, gid)
Change the owner and group id of the file given by *fd* to the numeric *uid*
- and *gid*. To leave one of the ids unchanged, set it to -1.
+ and *gid*. To leave one of the ids unchanged, set it to -1. See
+ :func:`chown`. As of Python 3.3, this is equivalent to ``os.chown(fd, uid,
+ gid)``.
Availability: Unix.
@@ -662,20 +742,24 @@ as internal buffering of data.
included in ``pathconf_names``, an :exc:`OSError` is raised with
:const:`errno.EINVAL` for the error number.
+ As of Python 3.3, this is equivalent to ``os.pathconf(fd, name)``.
+
Availability: Unix.
.. function:: fstat(fd)
- Return status for file descriptor *fd*, like :func:`~os.stat`.
+ Return status for file descriptor *fd*, like :func:`~os.stat`. As of Python
+ 3.3, this is equivalent to ``os.stat(fd)``.
Availability: Unix, Windows.
.. function:: fstatvfs(fd)
- Return information about the filesystem containing the file associated with file
- descriptor *fd*, like :func:`statvfs`.
+ Return information about the filesystem containing the file associated with
+ file descriptor *fd*, like :func:`statvfs`. As of Python 3.3, this is
+ equivalent to ``os.statvfs(fd)``.
Availability: Unix.
@@ -689,13 +773,14 @@ as internal buffering of data.
``f.flush()``, and then do ``os.fsync(f.fileno())``, to ensure that all internal
buffers associated with *f* are written to disk.
- Availability: Unix, and Windows.
+ Availability: Unix, Windows.
.. function:: ftruncate(fd, length)
- Truncate the file corresponding to file descriptor *fd*, so that it is at most
- *length* bytes in size.
+ Truncate the file corresponding to file descriptor *fd*, so that it is at
+ most *length* bytes in size. As of Python 3.3, this is equivalent to
+ ``os.truncate(fd, length)``.
Availability: Unix.
@@ -708,6 +793,31 @@ as internal buffering of data.
Availability: Unix.
+.. function:: lockf(fd, cmd, len)
+
+ Apply, test or remove a POSIX lock on an open file descriptor.
+ *fd* is an open file descriptor.
+ *cmd* specifies the command to use - one of :data:`F_LOCK`, :data:`F_TLOCK`,
+ :data:`F_ULOCK` or :data:`F_TEST`.
+ *len* specifies the section of the file to lock.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. data:: F_LOCK
+ F_TLOCK
+ F_ULOCK
+ F_TEST
+
+ Flags that specify what action :func:`lockf` will take.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
.. function:: lseek(fd, pos, how)
Set the current position of file descriptor *fd* to position *pos*, modified
@@ -724,21 +834,29 @@ as internal buffering of data.
SEEK_END
Parameters to the :func:`lseek` function. Their values are 0, 1, and 2,
- respectively. Availability: Windows, Unix.
+ respectively.
+
+ Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ Some operating systems could support additional values, like
+ :data:`os.SEEK_HOLE` or :data:`os.SEEK_DATA`.
-.. function:: open(file, flags[, mode])
+
+.. function:: open(file, flags, mode=0o777, *, dir_fd=None)
Open the file *file* and set various flags according to *flags* and possibly
- its mode according to *mode*. The default *mode* is ``0o777`` (octal), and
- the current umask value is first masked out. Return the file descriptor for
- the newly opened file.
+ its mode according to *mode*. When computing *mode*, the current umask value
+ is first masked out. Return the file descriptor for the newly opened file.
For a description of the flag and mode values, see the C run-time documentation;
flag constants (like :const:`O_RDONLY` and :const:`O_WRONLY`) are defined in
this module too (see :ref:`open-constants`). In particular, on Windows adding
:const:`O_BINARY` is needed to open files in binary mode.
+ This function can support :ref:`paths relative to directory descriptors
+ <dir_fd>`.
+
Availability: Unix, Windows.
.. note::
@@ -748,6 +866,9 @@ as internal buffering of data.
:meth:`~file.read` and :meth:`~file.write` methods (and many more). To
wrap a file descriptor in a file object, use :func:`fdopen`.
+ .. versionadded:: 3.3
+ The *dir_fd* argument.
+
.. function:: openpty()
@@ -768,6 +889,79 @@ as internal buffering of data.
Availability: Unix, Windows.
+.. function:: pipe2(flags)
+
+ Create a pipe with *flags* set atomically.
+ *flags* can be constructed by ORing together one or more of these values:
+ :data:`O_NONBLOCK`, :data:`O_CLOEXEC`.
+ Return a pair of file descriptors ``(r, w)`` usable for reading and writing,
+ respectively.
+
+ Availability: some flavors of Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: posix_fallocate(fd, offset, len)
+
+ Ensures that enough disk space is allocated for the file specified by *fd*
+ starting from *offset* and continuing for *len* bytes.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: posix_fadvise(fd, offset, len, advice)
+
+ Announces an intention to access data in a specific pattern thus allowing
+ the kernel to make optimizations.
+ The advice applies to the region of the file specified by *fd* starting at
+ *offset* and continuing for *len* bytes.
+ *advice* is one of :data:`POSIX_FADV_NORMAL`, :data:`POSIX_FADV_SEQUENTIAL`,
+ :data:`POSIX_FADV_RANDOM`, :data:`POSIX_FADV_NOREUSE`,
+ :data:`POSIX_FADV_WILLNEED` or :data:`POSIX_FADV_DONTNEED`.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. data:: POSIX_FADV_NORMAL
+ POSIX_FADV_SEQUENTIAL
+ POSIX_FADV_RANDOM
+ POSIX_FADV_NOREUSE
+ POSIX_FADV_WILLNEED
+ POSIX_FADV_DONTNEED
+
+ Flags that can be used in *advice* in :func:`posix_fadvise` that specify
+ the access pattern that is likely to be used.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: pread(fd, buffersize, offset)
+
+ Read from a file descriptor, *fd*, at a position of *offset*. It will read up
+ to *buffersize* number of bytes. The file offset remains unchanged.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: pwrite(fd, string, offset)
+
+ Write *string* to a file descriptor, *fd*, from *offset*, leaving the file
+ offset unchanged.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
.. function:: read(fd, n)
Read at most *n* bytes from file descriptor *fd*. Return a bytestring containing the
@@ -779,10 +973,62 @@ as internal buffering of data.
.. note::
This function is intended for low-level I/O and must be applied to a file
- descriptor as returned by :func:`os.open` or :func:`pipe`. To read a "file object"
- returned by the built-in function :func:`open` or by :func:`popen` or
- :func:`fdopen`, or :data:`sys.stdin`, use its :meth:`~file.read` or
- :meth:`~file.readline` methods.
+ descriptor as returned by :func:`os.open` or :func:`pipe`. To read a
+ "file object" returned by the built-in function :func:`open` or by
+ :func:`popen` or :func:`fdopen`, or :data:`sys.stdin`, use its
+ :meth:`~file.read` or :meth:`~file.readline` methods.
+
+
+.. function:: sendfile(out, in, offset, nbytes)
+ sendfile(out, in, offset, nbytes, headers=None, trailers=None, flags=0)
+
+ Copy *nbytes* bytes from file descriptor *in* to file descriptor *out*
+ starting at *offset*.
+ Return the number of bytes sent. When EOF is reached return 0.
+
+ The first function notation is supported by all platforms that define
+ :func:`sendfile`.
+
+ On Linux, if *offset* is given as ``None``, the bytes are read from the
+ current position of *in* and the position of *in* is updated.
+
+ The second case may be used on Mac OS X and FreeBSD where *headers* and
+ *trailers* are arbitrary sequences of buffers that are written before and
+ after the data from *in* is written. It returns the same as the first case.
+
+ On Mac OS X and FreeBSD, a value of 0 for *nbytes* specifies to send until
+ the end of *in* is reached.
+
+ On Solaris, *out* may be the file descriptor of a regular file or the file
+ descriptor of a socket. On all other platforms, *out* must be the file
+ descriptor of an open socket.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. data:: SF_NODISKIO
+ SF_MNOWAIT
+ SF_SYNC
+
+ Parameters to the :func:`sendfile` function, if the implementation supports
+ them.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: readv(fd, buffers)
+
+ Read from a file descriptor into a number of writable buffers. *buffers* is
+ an arbitrary sequence of writable buffers. Returns the total number of bytes
+ read.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
.. function:: tcgetpgrp(fd)
@@ -826,6 +1072,17 @@ as internal buffering of data.
:meth:`~file.write` method.
+.. function:: writev(fd, buffers)
+
+ Write the contents of *buffers* to file descriptor *fd*, where *buffers*
+ is an arbitrary sequence of buffers.
+ Returns the total number of bytes written.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
.. _open-constants:
``open()`` flag constants
@@ -857,9 +1114,12 @@ or `the MSDN <http://msdn.microsoft.com/en-us/library/z0kc8e3z.aspx>`_ on Window
O_NOCTTY
O_SHLOCK
O_EXLOCK
+ O_CLOEXEC
These constants are only available on Unix.
+ .. versionchanged:: 3.3
+ Add :data:`O_CLOEXEC` constant.
.. data:: O_BINARY
O_NOINHERIT
@@ -882,12 +1142,106 @@ or `the MSDN <http://msdn.microsoft.com/en-us/library/z0kc8e3z.aspx>`_ on Window
the C library.
+.. data:: RTLD_LAZY
+ RTLD_NOW
+ RTLD_GLOBAL
+ RTLD_LOCAL
+ RTLD_NODELETE
+ RTLD_NOLOAD
+ RTLD_DEEPBIND
+
+ See the Unix manual page :manpage:`dlopen(3)`.
+
+ .. versionadded:: 3.3
+
+
+.. _terminal-size:
+
+Querying the size of a terminal
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 3.3
+
+.. function:: get_terminal_size(fd=STDOUT_FILENO)
+
+ Return the size of the terminal window as ``(columns, lines)``,
+ tuple of type :class:`terminal_size`.
+
+ The optional argument ``fd`` (default ``STDOUT_FILENO``, or standard
+ output) specifies which file descriptor should be queried.
+
+ If the file descriptor is not connected to a terminal, an :exc:`OSError`
+ is thrown.
+
+ :func:`shutil.get_terminal_size` is the high-level function which
+ should normally be used, ``os.get_terminal_size`` is the low-level
+ implementation.
+
+ Availability: Unix, Windows.
+
+.. class:: terminal_size
+
+ A subclass of tuple, holding ``(columns, lines)`` of the terminal window size.
+
+ .. attribute:: columns
+
+ Width of the terminal window in characters.
+
+ .. attribute:: lines
+
+ Height of the terminal window in characters.
+
+
.. _os-file-dir:
Files and Directories
---------------------
-.. function:: access(path, mode)
+On some Unix platforms, many of these functions support one or more of these
+features:
+
+.. _path_fd:
+
+* **specifying a file descriptor:**
+ For some functions, the *path* argument can be not only a string giving a path
+ name, but also a file descriptor. The function will then operate on the file
+ referred to by the descriptor. (For POSIX systems, Python will call the
+ ``f...`` version of the function.)
+
+ You can check whether or not *path* can be specified as a file descriptor on
+ your platform using :data:`os.supports_fd`. If it is unavailable, using it
+ will raise a :exc:`NotImplementedError`.
+
+ If the function also supports *dir_fd* or *follow_symlinks* arguments, it is
+ an error to specify one of those when supplying *path* as a file descriptor.
+
+.. _dir_fd:
+
+* **paths relative to directory descriptors:** If *dir_fd* is not ``None``, it
+ should be a file descriptor referring to a directory, and the path to operate
+ on should be relative; path will then be relative to that directory. If the
+ path is absolute, *dir_fd* is ignored. (For POSIX systems, Python will call
+ the ``...at`` or ``f...at`` version of the function.)
+
+ You can check whether or not *dir_fd* is supported on your platform using
+ :data:`os.supports_dir_fd`. If it is unavailable, using it will raise a
+ :exc:`NotImplementedError`.
+
+.. _follow_symlinks:
+
+* **not following symlinks:** If *follow_symlinks* is
+ ``False``, and the last element of the path to operate on is a symbolic link,
+ the function will operate on the symbolic link itself instead of the file the
+ link points to. (For POSIX systems, Python will call the ``l...`` version of
+ the function.)
+
+ You can check whether or not *follow_symlinks* is supported on your platform
+ using :data:`os.supports_follow_symlinks`. If it is unavailable, using it
+ will raise a :exc:`NotImplementedError`.
+
+
+
+.. function:: access(path, mode, *, dir_fd=None, effective_ids=False, follow_symlinks=True)
Use the real uid/gid to test for access to *path*. Note that most operations
will use the effective uid/gid, therefore this routine can be used in a
@@ -898,6 +1252,15 @@ Files and Directories
:const:`False` if not. See the Unix man page :manpage:`access(2)` for more
information.
+ This function can support specifying :ref:`paths relative to directory
+ descriptors <dir_fd>` and :ref:`not following symlinks <follow_symlinks>`.
+
+ If *effective_ids* is ``True``, :func:`access` will perform its access
+ checks using the effective uid/gid instead of the real uid/gid.
+ *effective_ids* may not be supported on your platform; you can check whether
+ or not it is available using :data:`os.supports_effective_ids`. If it is
+ unavailable, using it will raise a :exc:`NotImplementedError`.
+
Availability: Unix, Windows.
.. note::
@@ -917,11 +1280,8 @@ Files and Directories
try:
fp = open("myfile")
- except IOError as e:
- if e.errno == errno.EACCES:
- return "some default data"
- # Not a permission error.
- raise
+ except PermissionError:
+ return "some default data"
else:
with fp:
return fp.read()
@@ -932,29 +1292,18 @@ Files and Directories
succeed, particularly for operations on network filesystems which may have
permissions semantics beyond the usual POSIX permission-bit model.
+ .. versionchanged:: 3.3
+ Added the *dir_fd*, *effective_ids*, and *follow_symlinks* parameters.
-.. data:: F_OK
-
- Value to pass as the *mode* parameter of :func:`access` to test the existence of
- *path*.
-
-
-.. data:: R_OK
-
- Value to include in the *mode* parameter of :func:`access` to test the
- readability of *path*.
+.. data:: F_OK
+ R_OK
+ W_OK
+ X_OK
-.. data:: W_OK
-
- Value to include in the *mode* parameter of :func:`access` to test the
- writability of *path*.
-
-
-.. data:: X_OK
-
- Value to include in the *mode* parameter of :func:`access` to determine if
- *path* can be executed.
+ Values to pass as the *mode* parameter of :func:`access` to test the
+ existence, readability, writability and executability of *path*,
+ respectively.
.. function:: chdir(path)
@@ -963,33 +1312,17 @@ Files and Directories
Change the current working directory to *path*.
- Availability: Unix, Windows.
-
-
-.. function:: fchdir(fd)
-
- Change the current working directory to the directory represented by the file
- descriptor *fd*. The descriptor must refer to an opened directory, not an open
- file.
-
- Availability: Unix.
-
-
-.. function:: getcwd()
-
- Return a string representing the current working directory.
+ This function can support :ref:`specifying a file descriptor <path_fd>`. The
+ descriptor must refer to an opened directory, not an open file.
Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ Added support for specifying *path* as a file descriptor
+ on some platforms.
-.. function:: getcwdb()
-
- Return a bytestring representing the current working directory.
-
- Availability: Unix, Windows.
-
-.. function:: chflags(path, flags)
+.. function:: chflags(path, flags, *, follow_symlinks=True)
Set the flags of *path* to the numeric *flags*. *flags* may take a combination
(bitwise OR) of the following values (as defined in the :mod:`stat` module):
@@ -1007,16 +1340,15 @@ Files and Directories
* :data:`stat.SF_NOUNLINK`
* :data:`stat.SF_SNAPSHOT`
- Availability: Unix.
-
+ This function can support :ref:`not following symlinks <follow_symlinks>`.
-.. function:: chroot(path)
+ Availability: Unix.
- Change the root directory of the current process to *path*. Availability:
- Unix.
+ .. versionadded:: 3.3
+ The *follow_symlinks* argument.
-.. function:: chmod(path, mode)
+.. function:: chmod(path, mode, *, dir_fd=None, follow_symlinks=True)
Change the mode of *path* to the numeric *mode*. *mode* may take one of the
following values (as defined in the :mod:`stat` module) or bitwise ORed
@@ -1042,28 +1374,77 @@ Files and Directories
* :data:`stat.S_IWOTH`
* :data:`stat.S_IXOTH`
+ This function can support :ref:`specifying a file descriptor <path_fd>`,
+ :ref:`paths relative to directory descriptors <dir_fd>` and :ref:`not
+ following symlinks <follow_symlinks>`.
+
Availability: Unix, Windows.
.. note::
- Although Windows supports :func:`chmod`, you can only set the file's read-only
- flag with it (via the ``stat.S_IWRITE`` and ``stat.S_IREAD``
- constants or a corresponding integer value). All other bits are
- ignored.
+ Although Windows supports :func:`chmod`, you can only set the file's
+ read-only flag with it (via the ``stat.S_IWRITE`` and ``stat.S_IREAD``
+ constants or a corresponding integer value). All other bits are ignored.
+
+ .. versionadded:: 3.3
+ Added support for specifying *path* as an open file descriptor,
+ and the *dir_fd* and *follow_symlinks* arguments.
+
+
+.. function:: chown(path, uid, gid, *, dir_fd=None, follow_symlinks=True)
+
+ Change the owner and group id of *path* to the numeric *uid* and *gid*. To
+ leave one of the ids unchanged, set it to -1.
+
+ This function can support :ref:`specifying a file descriptor <path_fd>`,
+ :ref:`paths relative to directory descriptors <dir_fd>` and :ref:`not
+ following symlinks <follow_symlinks>`.
+
+ See :func:`shutil.chown` for a higher-level function that accepts names in
+ addition to numeric ids.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+ Added support for specifying an open file descriptor for *path*,
+ and the *dir_fd* and *follow_symlinks* arguments.
+
+
+.. function:: chroot(path)
+
+ Change the root directory of the current process to *path*.
+ Availability: Unix.
-.. function:: chown(path, uid, gid)
- Change the owner and group id of *path* to the numeric *uid* and *gid*. To leave
- one of the ids unchanged, set it to -1.
+.. function:: fchdir(fd)
+
+ Change the current working directory to the directory represented by the file
+ descriptor *fd*. The descriptor must refer to an opened directory, not an
+ open file. As of Python 3.3, this is equivalent to ``os.chdir(fd)``.
Availability: Unix.
+.. function:: getcwd()
+
+ Return a string representing the current working directory.
+
+ Availability: Unix, Windows.
+
+
+.. function:: getcwdb()
+
+ Return a bytestring representing the current working directory.
+
+ Availability: Unix, Windows.
+
+
.. function:: lchflags(path, flags)
- Set the flags of *path* to the numeric *flags*, like :func:`chflags`, but do not
- follow symbolic links.
+ Set the flags of *path* to the numeric *flags*, like :func:`chflags`, but do
+ not follow symbolic links. As of Python 3.3, this is equivalent to
+ ``os.chflags(path, flags, follow_symlinks=False)``.
Availability: Unix.
@@ -1071,110 +1452,101 @@ Files and Directories
.. function:: lchmod(path, mode)
Change the mode of *path* to the numeric *mode*. If path is a symlink, this
- affects the symlink rather than the target. See the docs for :func:`chmod`
- for possible values of *mode*.
+ affects the symlink rather than the target. See the docs for :func:`chmod`
+ for possible values of *mode*. As of Python 3.3, this is equivalent to
+ ``os.chmod(path, mode, follow_symlinks=False)``.
Availability: Unix.
.. function:: lchown(path, uid, gid)
- Change the owner and group id of *path* to the numeric *uid* and *gid*. This
- function will not follow symbolic links.
+ Change the owner and group id of *path* to the numeric *uid* and *gid*. This
+ function will not follow symbolic links. As of Python 3.3, this is equivalent
+ to ``os.chown(path, uid, gid, follow_symlinks=False)``.
Availability: Unix.
-.. function:: link(source, link_name)
+.. function:: link(src, dst, *, src_dir_fd=None, dst_dir_fd=None, follow_symlinks=True)
+
+ Create a hard link pointing to *src* named *dst*.
- Create a hard link pointing to *source* named *link_name*.
+ This function can support specifying *src_dir_fd* and/or *dst_dir_fd* to
+ supply :ref:`paths relative to directory descriptors <dir_fd>`, and :ref:`not
+ following symlinks <follow_symlinks>`.
Availability: Unix, Windows.
.. versionchanged:: 3.2
Added Windows support.
+ .. versionadded:: 3.3
+ Added the *src_dir_fd*, *dst_dir_fd*, and *follow_symlinks* arguments.
+
.. function:: listdir(path='.')
Return a list containing the names of the entries in the directory given by
- *path* (default: ``'.'``). The list is in arbitrary order. It does not include the special
+ *path*. The list is in arbitrary order, and does not include the special
entries ``'.'`` and ``'..'`` even if they are present in the directory.
- This function can be called with a bytes or string argument, and returns
- filenames of the same datatype.
+ *path* may be either of type ``str`` or of type ``bytes``. If *path*
+ is of type ``bytes``, the filenames returned will also be of type ``bytes``;
+ in all other circumstances, they will be of type ``str``.
+
+ This function can also support :ref:`specifying a file descriptor
+ <path_fd>`; the file descriptor must refer to a directory.
+
+ .. note::
+ To encode ``str`` filenames to ``bytes``, use :func:`~os.fsencode`.
Availability: Unix, Windows.
.. versionchanged:: 3.2
The *path* parameter became optional.
-.. function:: lstat(path)
+ .. versionadded:: 3.3
+ Added support for specifying an open file descriptor for *path*.
+
+
+.. function:: lstat(path, *, dir_fd=None)
Perform the equivalent of an :c:func:`lstat` system call on the given path.
Similar to :func:`~os.stat`, but does not follow symbolic links. On
platforms that do not support symbolic links, this is an alias for
- :func:`~os.stat`.
+ :func:`~os.stat`. As of Python 3.3, this is equivalent to ``os.stat(path,
+ dir_fd=dir_fd, follow_symlinks=False)``.
+
+ This function can also support :ref:`paths relative to directory descriptors
+ <dir_fd>`.
.. versionchanged:: 3.2
Added support for Windows 6.0 (Vista) symbolic links.
+ .. versionchanged:: 3.3
+ Added the *dir_fd* parameter.
-.. function:: mkfifo(path[, mode])
-
- Create a FIFO (a named pipe) named *path* with numeric mode *mode*. The
- default *mode* is ``0o666`` (octal). The current umask value is first masked
- out from the mode.
-
- FIFOs are pipes that can be accessed like regular files. FIFOs exist until they
- are deleted (for example with :func:`os.unlink`). Generally, FIFOs are used as
- rendezvous between "client" and "server" type processes: the server opens the
- FIFO for reading, and the client opens it for writing. Note that :func:`mkfifo`
- doesn't open the FIFO --- it just creates the rendezvous point.
-
- Availability: Unix.
-
-
-.. function:: mknod(filename[, mode=0o600[, device=0]])
-
- Create a filesystem node (file, device special file or named pipe) named
- *filename*. *mode* specifies both the permissions to use and the type of node
- to be created, being combined (bitwise OR) with one of ``stat.S_IFREG``,
- ``stat.S_IFCHR``, ``stat.S_IFBLK``, and ``stat.S_IFIFO`` (those constants are
- available in :mod:`stat`). For ``stat.S_IFCHR`` and ``stat.S_IFBLK``,
- *device* defines the newly created device special file (probably using
- :func:`os.makedev`), otherwise it is ignored.
-
-
-.. function:: major(device)
-
- Extract the device major number from a raw device number (usually the
- :attr:`st_dev` or :attr:`st_rdev` field from :c:type:`stat`).
-
-
-.. function:: minor(device)
-
- Extract the device minor number from a raw device number (usually the
- :attr:`st_dev` or :attr:`st_rdev` field from :c:type:`stat`).
-
-
-.. function:: makedev(major, minor)
- Compose a raw device number from the major and minor device numbers.
+.. function:: mkdir(path, mode=0o777, *, dir_fd=None)
+ Create a directory named *path* with numeric mode *mode*.
-.. function:: mkdir(path[, mode])
+ On some systems, *mode* is ignored. Where it is used, the current umask
+ value is first masked out. If the directory already exists, :exc:`OSError`
+ is raised.
- Create a directory named *path* with numeric mode *mode*. The default *mode*
- is ``0o777`` (octal). On some systems, *mode* is ignored. Where it is used,
- the current umask value is first masked out. If the directory already
- exists, :exc:`OSError` is raised.
+ This function can also support :ref:`paths relative to directory descriptors
+ <dir_fd>`.
It is also possible to create temporary directories; see the
:mod:`tempfile` module's :func:`tempfile.mkdtemp` function.
Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ The *dir_fd* argument.
+
.. function:: makedirs(path, mode=0o777, exist_ok=False)
@@ -1205,6 +1577,60 @@ Files and Directories
The *exist_ok* parameter.
+.. function:: mkfifo(path, mode=0o666, *, dir_fd=None)
+
+ Create a FIFO (a named pipe) named *path* with numeric mode *mode*.
+ The current umask value is first masked out from the mode.
+
+ This function can also support :ref:`paths relative to directory descriptors
+ <dir_fd>`.
+
+ FIFOs are pipes that can be accessed like regular files. FIFOs exist until they
+ are deleted (for example with :func:`os.unlink`). Generally, FIFOs are used as
+ rendezvous between "client" and "server" type processes: the server opens the
+ FIFO for reading, and the client opens it for writing. Note that :func:`mkfifo`
+ doesn't open the FIFO --- it just creates the rendezvous point.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+ The *dir_fd* argument.
+
+
+.. function:: mknod(filename, mode=0o600, device=0, *, dir_fd=None)
+
+ Create a filesystem node (file, device special file or named pipe) named
+ *filename*. *mode* specifies both the permissions to use and the type of node
+ to be created, being combined (bitwise OR) with one of ``stat.S_IFREG``,
+ ``stat.S_IFCHR``, ``stat.S_IFBLK``, and ``stat.S_IFIFO`` (those constants are
+ available in :mod:`stat`). For ``stat.S_IFCHR`` and ``stat.S_IFBLK``,
+ *device* defines the newly created device special file (probably using
+ :func:`os.makedev`), otherwise it is ignored.
+
+ This function can also support :ref:`paths relative to directory descriptors
+ <dir_fd>`.
+
+ .. versionadded:: 3.3
+ The *dir_fd* argument.
+
+
+.. function:: major(device)
+
+ Extract the device major number from a raw device number (usually the
+ :attr:`st_dev` or :attr:`st_rdev` field from :c:type:`stat`).
+
+
+.. function:: minor(device)
+
+ Extract the device minor number from a raw device number (usually the
+ :attr:`st_dev` or :attr:`st_rdev` field from :c:type:`stat`).
+
+
+.. function:: makedev(major, minor)
+
+ Compose a raw device number from the major and minor device numbers.
+
+
.. function:: pathconf(path, name)
Return system configuration information relevant to a named file. *name*
@@ -1220,6 +1646,9 @@ Files and Directories
included in ``pathconf_names``, an :exc:`OSError` is raised with
:const:`errno.EINVAL` for the error number.
+ This function can support :ref:`specifying a file descriptor
+ <path_fd>`.
+
Availability: Unix.
@@ -1227,38 +1656,53 @@ Files and Directories
Dictionary mapping names accepted by :func:`pathconf` and :func:`fpathconf` to
the integer values defined for those names by the host operating system. This
- can be used to determine the set of names known to the system. Availability:
- Unix.
+ can be used to determine the set of names known to the system.
+
+ Availability: Unix.
-.. function:: readlink(path)
+.. function:: readlink(path, *, dir_fd=None)
Return a string representing the path to which the symbolic link points. The
- result may be either an absolute or relative pathname; if it is relative, it may
- be converted to an absolute pathname using ``os.path.join(os.path.dirname(path),
- result)``.
+ result may be either an absolute or relative pathname; if it is relative, it
+ may be converted to an absolute pathname using
+ ``os.path.join(os.path.dirname(path), result)``.
If the *path* is a string object, the result will also be a string object,
and the call may raise an UnicodeDecodeError. If the *path* is a bytes
object, the result will be a bytes object.
+ This function can also support :ref:`paths relative to directory descriptors
+ <dir_fd>`.
+
Availability: Unix, Windows
.. versionchanged:: 3.2
Added support for Windows 6.0 (Vista) symbolic links.
+ .. versionadded:: 3.3
+ The *dir_fd* argument.
+
-.. function:: remove(path)
+.. function:: remove(path, *, dir_fd=None)
Remove (delete) the file *path*. If *path* is a directory, :exc:`OSError` is
- raised; see :func:`rmdir` below to remove a directory. This is identical to
- the :func:`unlink` function documented below. On Windows, attempting to
- remove a file that is in use causes an exception to be raised; on Unix, the
- directory entry is removed but the storage allocated to the file is not made
- available until the original file is no longer in use.
+ raised. Use :func:`rmdir` to remove directories.
+
+ This function can support :ref:`paths relative to directory descriptors
+ <dir_fd>`.
+
+ On Windows, attempting to remove a file that is in use causes an exception to
+ be raised; on Unix, the directory entry is removed but the storage allocated
+ to the file is not made available until the original file is no longer in use.
+
+ This function is identical to :func:`unlink`.
Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ The *dir_fd* argument.
+
.. function:: removedirs(path)
@@ -1274,7 +1718,7 @@ Files and Directories
successfully removed.
-.. function:: rename(src, dst)
+.. function:: rename(src, dst, *, src_dir_fd=None, dst_dir_fd=None)
Rename the file or directory *src* to *dst*. If *dst* is a directory,
:exc:`OSError` will be raised. On Unix, if *dst* exists and is a file, it will
@@ -1282,11 +1726,18 @@ Files and Directories
Unix flavors if *src* and *dst* are on different filesystems. If successful,
the renaming will be an atomic operation (this is a POSIX requirement). On
Windows, if *dst* already exists, :exc:`OSError` will be raised even if it is a
- file; there may be no way to implement an atomic rename when *dst* names an
- existing file.
+ file.
+
+ This function can support specifying *src_dir_fd* and/or *dst_dir_fd* to
+ supply :ref:`paths relative to directory descriptors <dir_fd>`.
+
+ If you want cross-platform overwriting of the destination, use :func:`replace`.
Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ The *src_dir_fd* and *dst_dir_fd* arguments.
+
.. function:: renames(old, new)
@@ -1301,22 +1752,46 @@ Files and Directories
permissions needed to remove the leaf directory or file.
-.. function:: rmdir(path)
+.. function:: replace(src, dst, *, src_dir_fd=None, dst_dir_fd=None)
+
+ Rename the file or directory *src* to *dst*. If *dst* is a directory,
+ :exc:`OSError` will be raised. If *dst* exists and is a file, it will
+ be replaced silently if the user has permission. The operation may fail
+ if *src* and *dst* are on different filesystems. If successful,
+ the renaming will be an atomic operation (this is a POSIX requirement).
+
+ This function can support specifying *src_dir_fd* and/or *dst_dir_fd* to
+ supply :ref:`paths relative to directory descriptors <dir_fd>`.
+
+ Availability: Unix, Windows.
+
+ .. versionadded:: 3.3
+
+
+.. function:: rmdir(path, *, dir_fd=None)
Remove (delete) the directory *path*. Only works when the directory is
empty, otherwise, :exc:`OSError` is raised. In order to remove whole
directory trees, :func:`shutil.rmtree` can be used.
+ This function can support :ref:`paths relative to directory descriptors
+ <dir_fd>`.
+
Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ The *dir_fd* parameter.
-.. function:: stat(path)
+
+.. function:: stat(path, *, dir_fd=None, follow_symlinks=True)
Perform the equivalent of a :c:func:`stat` system call on the given path.
- (This function follows symlinks; to stat a symlink use :func:`lstat`.)
+ *path* may be specified as either a string or as an open file descriptor.
+ (This function normally follows symlinks; to stat a symlink add the argument
+ ``follow_symlinks=False``, or use :func:`lstat`.)
- The return value is an object whose attributes correspond to the members
- of the :c:type:`stat` structure, namely:
+ The return value is an object whose attributes correspond roughly
+ to the members of the :c:type:`stat` structure, namely:
* :attr:`st_mode` - protection bits,
* :attr:`st_ino` - inode number,
@@ -1325,10 +1800,18 @@ Files and Directories
* :attr:`st_uid` - user id of owner,
* :attr:`st_gid` - group id of owner,
* :attr:`st_size` - size of file, in bytes,
- * :attr:`st_atime` - time of most recent access,
- * :attr:`st_mtime` - time of most recent content modification,
- * :attr:`st_ctime` - platform dependent; time of most recent metadata change on
- Unix, or the time of creation on Windows)
+ * :attr:`st_atime` - time of most recent access expressed in seconds,
+ * :attr:`st_mtime` - time of most recent content modification
+ expressed in seconds,
+ * :attr:`st_ctime` - platform dependent; time of most recent metadata
+ change on Unix, or the time of creation on Windows, expressed in seconds
+ * :attr:`st_atime_ns` - time of most recent access
+ expressed in nanoseconds as an integer,
+ * :attr:`st_mtime_ns` - time of most recent content modification
+ expressed in nanoseconds as an integer,
+ * :attr:`st_ctime_ns` - platform dependent; time of most recent metadata
+ change on Unix, or the time of creation on Windows,
+ expressed in nanoseconds as an integer
On some Unix systems (such as Linux), the following attributes may also be
available:
@@ -1358,13 +1841,25 @@ Files and Directories
or FAT32 file systems, :attr:`st_mtime` has 2-second resolution, and
:attr:`st_atime` has only 1-day resolution. See your operating system
documentation for details.
-
- For backward compatibility, the return value of :func:`~os.stat` is also accessible
- as a tuple of at least 10 integers giving the most important (and portable)
- members of the :c:type:`stat` structure, in the order :attr:`st_mode`,
- :attr:`st_ino`, :attr:`st_dev`, :attr:`st_nlink`, :attr:`st_uid`,
- :attr:`st_gid`, :attr:`st_size`, :attr:`st_atime`, :attr:`st_mtime`,
- :attr:`st_ctime`. More items may be added at the end by some implementations.
+ Similarly, although :attr:`st_atime_ns`, :attr:`st_mtime_ns`,
+ and :attr:`st_ctime_ns` are always expressed in nanoseconds, many
+ systems do not provide nanosecond precision. On systems that do
+ provide nanosecond precision, the floating-point object used to
+ store :attr:`st_atime`, :attr:`st_mtime`, and :attr:`st_ctime`
+ cannot preserve all of it, and as such will be slightly inexact.
+ If you need the exact timestamps you should always use
+ :attr:`st_atime_ns`, :attr:`st_mtime_ns`, and :attr:`st_ctime_ns`.
+
+ For backward compatibility, the return value of :func:`~os.stat` is also
+ accessible as a tuple of at least 10 integers giving the most important (and
+ portable) members of the :c:type:`stat` structure, in the order
+ :attr:`st_mode`, :attr:`st_ino`, :attr:`st_dev`, :attr:`st_nlink`,
+ :attr:`st_uid`, :attr:`st_gid`, :attr:`st_size`, :attr:`st_atime`,
+ :attr:`st_mtime`, :attr:`st_ctime`. More items may be added at the end by
+ some implementations.
+
+ This function can support :ref:`specifying a file descriptor <path_fd>` and
+ :ref:`not following symlinks <follow_symlinks>`.
.. index:: module: stat
@@ -1385,6 +1880,12 @@ Files and Directories
Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ Added the *dir_fd* and *follow_symlinks* arguments,
+ specifying a file descriptor instead of a path,
+ and the :attr:`st_atime_ns`, :attr:`st_mtime_ns`,
+ and :attr:`st_ctime_ns` members.
+
.. function:: stat_float_times([newvalue])
@@ -1410,6 +1911,8 @@ Files and Directories
are processed, this application should turn the feature off until the library
has been corrected.
+ .. deprecated:: 3.3
+
.. function:: statvfs(path)
@@ -1425,34 +1928,118 @@ Files and Directories
read-only, and if :const:`ST_NOSUID` is set, the semantics of
setuid/setgid bits are disabled or not supported.
+ This function can support :ref:`specifying a file descriptor <path_fd>`.
+
.. versionchanged:: 3.2
The :const:`ST_RDONLY` and :const:`ST_NOSUID` constants were added.
Availability: Unix.
+ .. versionadded:: 3.3
+ Added support for specifying an open file descriptor for *path*.
-.. function:: symlink(source, link_name)
- symlink(source, link_name, target_is_directory=False)
- Create a symbolic link pointing to *source* named *link_name*.
+.. data:: supports_dir_fd
+
+ A :class:`~collections.Set` object indicating which functions in the
+ :mod:`os` module permit use of their *dir_fd* parameter. Different platforms
+ provide different functionality, and an option that might work on one might
+ be unsupported on another. For consistency's sakes, functions that support
+ *dir_fd* always allow specifying the parameter, but will throw an exception
+ if the functionality is not actually available.
+
+ To check whether a particular function permits use of its *dir_fd*
+ parameter, use the ``in`` operator on ``supports_dir_fd``. As an example,
+ this expression determines whether the *dir_fd* parameter of :func:`os.stat`
+ is locally available::
+
+ os.stat in os.supports_dir_fd
+
+ Currently *dir_fd* parameters only work on Unix platforms; none of them work
+ on Windows.
+
+ .. versionadded:: 3.3
+
+
+.. data:: supports_effective_ids
+
+ A :class:`~collections.Set` object indicating which functions in the
+ :mod:`os` module permit use of the *effective_ids* parameter for
+ :func:`os.access`. If the local platform supports it, the collection will
+ contain :func:`os.access`, otherwise it will be empty.
+
+ To check whether you can use the *effective_ids* parameter for
+ :func:`os.access`, use the ``in`` operator on ``supports_dir_fd``, like so::
+
+ os.access in os.supports_effective_ids
+
+ Currently *effective_ids* only works on Unix platforms; it does not work on
+ Windows.
+
+ .. versionadded:: 3.3
+
+
+.. data:: supports_fd
+
+ A :class:`~collections.Set` object indicating which functions in the
+ :mod:`os` module permit specifying their *path* parameter as an open file
+ descriptor. Different platforms provide different functionality, and an
+ option that might work on one might be unsupported on another. For
+ consistency's sakes, functions that support *fd* always allow specifying
+ the parameter, but will throw an exception if the functionality is not
+ actually available.
- On Windows, symlink version takes an additional optional parameter,
- *target_is_directory*, which defaults to ``False``.
+ To check whether a particular function permits specifying an open file
+ descriptor for its *path* parameter, use the ``in`` operator on
+ ``supports_fd``. As an example, this expression determines whether
+ :func:`os.chdir` accepts open file descriptors when called on your local
+ platform::
- On Windows, a symlink represents a file or a directory, and does not morph to
- the target dynamically. If *target_is_directory* is set to ``True``, the
- symlink will be created as a directory symlink, otherwise as a file symlink
- (the default).
+ os.chdir in os.supports_fd
+
+ .. versionadded:: 3.3
+
+
+.. data:: supports_follow_symlinks
+
+ A :class:`~collections.Set` object indicating which functions in the
+ :mod:`os` module permit use of their *follow_symlinks* parameter. Different
+ platforms provide different functionality, and an option that might work on
+ one might be unsupported on another. For consistency's sakes, functions that
+ support *follow_symlinks* always allow specifying the parameter, but will
+ throw an exception if the functionality is not actually available.
+
+ To check whether a particular function permits use of its *follow_symlinks*
+ parameter, use the ``in`` operator on ``supports_follow_symlinks``. As an
+ example, this expression determines whether the *follow_symlinks* parameter
+ of :func:`os.stat` is locally available::
+
+ os.stat in os.supports_follow_symlinks
+
+ .. versionadded:: 3.3
+
+
+.. function:: symlink(source, link_name, target_is_directory=False, *, dir_fd=None)
+
+ Create a symbolic link pointing to *source* named *link_name*.
+
+ On Windows, a symlink represents either a file or a directory, and does not
+ morph to the target dynamically. If *target_is_directory* is set to ``True``,
+ the symlink will be created as a directory symlink, otherwise as a file symlink
+ (the default). On non-Window platforms, *target_is_directory* is ignored.
Symbolic link support was introduced in Windows 6.0 (Vista). :func:`symlink`
will raise a :exc:`NotImplementedError` on Windows versions earlier than 6.0.
+ This function can support :ref:`paths relative to directory descriptors
+ <dir_fd>`.
+
.. note::
- The *SeCreateSymbolicLinkPrivilege* is required in order to successfully
- create symlinks. This privilege is not typically granted to regular
- users but is available to accounts which can escalate privileges to the
- administrator level. Either obtaining the privilege or running your
+ On Windows, the *SeCreateSymbolicLinkPrivilege* is required in order to
+ successfully create symlinks. This privilege is not typically granted to
+ regular users but is available to accounts which can escalate privileges
+ to the administrator level. Either obtaining the privilege or running your
application as an administrator are ways to successfully create symlinks.
:exc:`OSError` is raised when the function is called by an unprivileged
@@ -1463,31 +2050,85 @@ Files and Directories
.. versionchanged:: 3.2
Added support for Windows 6.0 (Vista) symbolic links.
+ .. versionadded:: 3.3
+ Added the *dir_fd* argument, and now allow *target_is_directory*
+ on non-Windows platforms.
+
+
+.. function:: sync()
+
+ Force write of everything to disk.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
-.. function:: unlink(path)
- Remove (delete) the file *path*. This is the same function as
- :func:`remove`; the :func:`unlink` name is its traditional Unix
- name.
+.. function:: truncate(path, length)
+
+ Truncate the file corresponding to *path*, so that it is at most
+ *length* bytes in size.
+
+ This function can support :ref:`specifying a file descriptor <path_fd>`.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: unlink(path, *, dir_fd=None)
+
+ Remove (delete) the file *path*. This function is identical to
+ :func:`remove`; the ``unlink`` name is its traditional Unix
+ name. Please see the documentation for :func:`remove` for
+ further information.
Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ The *dir_fd* parameter.
+
-.. function:: utime(path, times)
+.. function:: utime(path, times=None, *, ns=None, dir_fd=None, follow_symlinks=True)
- Set the access and modified times of the file specified by *path*. If *times*
- is ``None``, then the file's access and modified times are set to the current
- time. (The effect is similar to running the Unix program :program:`touch` on
- the path.) Otherwise, *times* must be a 2-tuple of numbers, of the form
- ``(atime, mtime)`` which is used to set the access and modified times,
- respectively. Whether a directory can be given for *path* depends on whether
- the operating system implements directories as files (for example, Windows
- does not). Note that the exact times you set here may not be returned by a
- subsequent :func:`~os.stat` call, depending on the resolution with which your
- operating system records access and modification times; see :func:`~os.stat`.
+ Set the access and modified times of the file specified by *path*.
+
+ :func:`utime` takes two optional parameters, *times* and *ns*.
+ These specify the times set on *path* and are used as follows:
+
+ - If *ns* is not ``None``,
+ it must be a 2-tuple of the form ``(atime_ns, mtime_ns)``
+ where each member is an int expressing nanoseconds.
+ - If *times* is not ``None``,
+ it must be a 2-tuple of the form ``(atime, mtime)``
+ where each member is an int or float expressing seconds.
+ - If *times* and *ns* are both ``None``,
+ this is equivalent to specifying ``ns=(atime_ns, mtime_ns)``
+ where both times are the current time.
+ (The effect is similar to running the Unix program
+ :program:`touch` on *path*.)
+
+ It is an error to specify tuples for both *times* and *ns*.
+
+ Whether a directory can be given for *path*
+ depends on whether the operating system implements directories as files
+ (for example, Windows does not). Note that the exact times you set here may
+ not be returned by a subsequent :func:`~os.stat` call, depending on the
+ resolution with which your operating system records access and modification
+ times; see :func:`~os.stat`. The best way to preserve exact times is to
+ use the *st_atime_ns* and *st_mtime_ns* fields from the :func:`os.stat`
+ result object with the *ns* parameter to `utime`.
+
+ This function can support :ref:`specifying a file descriptor <path_fd>`,
+ :ref:`paths relative to directory descriptors <dir_fd>` and :ref:`not
+ following symlinks <follow_symlinks>`.
Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ Added support for specifying an open file descriptor for *path*,
+ and the *dir_fd*, *follow_symlinks*, and *ns* parameters.
+
.. function:: walk(top, topdown=True, onerror=None, followlinks=False)
@@ -1534,9 +2175,9 @@ Files and Directories
.. note::
- Be aware that setting *followlinks* to ``True`` can lead to infinite recursion if a
- link points to a parent directory of itself. :func:`walk` does not keep track of
- the directories it visited already.
+ Be aware that setting *followlinks* to ``True`` can lead to infinite
+ recursion if a link points to a parent directory of itself. :func:`walk`
+ does not keep track of the directories it visited already.
.. note::
@@ -1572,6 +2213,137 @@ Files and Directories
os.rmdir(os.path.join(root, name))
+.. function:: fwalk(top='.', topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None)
+
+ .. index::
+ single: directory; walking
+ single: directory; traversal
+
+ This behaves exactly like :func:`walk`, except that it yields a 4-tuple
+ ``(dirpath, dirnames, filenames, dirfd)``, and it supports ``dir_fd``.
+
+ *dirpath*, *dirnames* and *filenames* are identical to :func:`walk` output,
+ and *dirfd* is a file descriptor referring to the directory *dirpath*.
+
+ This function always supports :ref:`paths relative to directory descriptors
+ <dir_fd>` and :ref:`not following symlinks <follow_symlinks>`. Note however
+ that, unlike other functions, the :func:`fwalk` default value for
+ *follow_symlinks* is ``False``.
+
+ .. note::
+
+ Since :func:`fwalk` yields file descriptors, those are only valid until
+ the next iteration step, so you should duplicate them (e.g. with
+ :func:`dup`) if you want to keep them longer.
+
+ This example displays the number of bytes taken by non-directory files in each
+ directory under the starting directory, except that it doesn't look under any
+ CVS subdirectory::
+
+ import os
+ for root, dirs, files, rootfd in os.fwalk('python/Lib/email'):
+ print(root, "consumes", end="")
+ print(sum([os.stat(name, dir_fd=rootfd).st_size for name in files]),
+ end="")
+ print("bytes in", len(files), "non-directory files")
+ if 'CVS' in dirs:
+ dirs.remove('CVS') # don't visit CVS directories
+
+ In the next example, walking the tree bottom-up is essential:
+ :func:`rmdir` doesn't allow deleting a directory before the directory is
+ empty::
+
+ # Delete everything reachable from the directory named in "top",
+ # assuming there are no symbolic links.
+ # CAUTION: This is dangerous! For example, if top == '/', it
+ # could delete all your disk files.
+ import os
+ for root, dirs, files, rootfd in os.fwalk(top, topdown=False):
+ for name in files:
+ os.unlink(name, dir_fd=rootfd)
+ for name in dirs:
+ os.rmdir(name, dir_fd=rootfd)
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+Linux extended attributes
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 3.3
+
+These functions are all available on Linux only.
+
+.. function:: getxattr(path, attribute, *, follow_symlinks=True)
+
+ Return the value of the extended filesystem attribute *attribute* for
+ *path*. *attribute* can be bytes or str. If it is str, it is encoded
+ with the filesystem encoding.
+
+ This function can support :ref:`specifying a file descriptor <path_fd>` and
+ :ref:`not following symlinks <follow_symlinks>`.
+
+
+.. function:: listxattr(path=None, *, follow_symlinks=True)
+
+ Return a list of the extended filesystem attributes on *path*. The
+ attributes in the list are represented as strings decoded with the filesystem
+ encoding. If *path* is ``None``, :func:`listxattr` will examine the current
+ directory.
+
+ This function can support :ref:`specifying a file descriptor <path_fd>` and
+ :ref:`not following symlinks <follow_symlinks>`.
+
+
+.. function:: removexattr(path, attribute, *, follow_symlinks=True)
+
+ Removes the extended filesystem attribute *attribute* from *path*.
+ *attribute* should be bytes or str. If it is a string, it is encoded
+ with the filesystem encoding.
+
+ This function can support :ref:`specifying a file descriptor <path_fd>` and
+ :ref:`not following symlinks <follow_symlinks>`.
+
+
+.. function:: setxattr(path, attribute, value, flags=0, *, follow_symlinks=True)
+
+ Set the extended filesystem attribute *attribute* on *path* to *value*.
+ *attribute* must be a bytes or str with no embedded NULs. If it is a str,
+ it is encoded with the filesystem encoding. *flags* may be
+ :data:`XATTR_REPLACE` or :data:`XATTR_CREATE`. If :data:`XATTR_REPLACE` is
+ given and the attribute does not exist, ``EEXISTS`` will be raised.
+ If :data:`XATTR_CREATE` is given and the attribute already exists, the
+ attribute will not be created and ``ENODATA`` will be raised.
+
+ This function can support :ref:`specifying a file descriptor <path_fd>` and
+ :ref:`not following symlinks <follow_symlinks>`.
+
+ .. note::
+
+ A bug in Linux kernel versions less than 2.6.39 caused the flags argument
+ to be ignored on some filesystems.
+
+
+.. data:: XATTR_SIZE_MAX
+
+ The maximum size the value of an extended attribute can be. Currently, this
+ is 64 kilobytes on Linux.
+
+
+.. data:: XATTR_CREATE
+
+ This is a possible value for the flags argument in :func:`setxattr`. It
+ indicates the operation must create an attribute.
+
+
+.. data:: XATTR_REPLACE
+
+ This is a possible value for the flags argument in :func:`setxattr`. It
+ indicates the operation must replace an existing attribute.
+
+
.. _os-process:
Process Management
@@ -1645,8 +2417,16 @@ to be ignored.
:func:`execlp`, :func:`execv`, and :func:`execvp` all cause the new process to
inherit the environment of the current process.
+ For :func:`execve` on some platforms, *path* may also be specified as an open
+ file descriptor. This functionality may not be supported on your platform;
+ you can check whether or not it is available using :data:`os.supports_fd`.
+ If it is unavailable, using it will raise a :exc:`NotImplementedError`.
+
Availability: Unix, Windows.
+ .. versionadded:: 3.3
+ Added support for specifying an open file descriptor for *path*
+ for :func:`execve`.
.. function:: _exit(n)
@@ -1836,6 +2616,8 @@ written in Python, such as a mail server's external command delivery program.
will be set to *sig*. The Windows version of :func:`kill` additionally takes
process handles to be killed.
+ See also :func:`signal.pthread_kill`.
+
.. versionadded:: 3.2
Windows support.
@@ -2026,14 +2808,30 @@ written in Python, such as a mail server's external command delivery program.
.. function:: times()
- Return a 5-tuple of floating point numbers indicating accumulated (processor
- or other) times, in seconds. The items are: user time, system time,
- children's user time, children's system time, and elapsed real time since a
- fixed point in the past, in that order. See the Unix manual page
+ Returns the current global process times.
+ The return value is an object with five attributes:
+
+ * :attr:`user` - user time
+ * :attr:`system` - system time
+ * :attr:`children_user` - user time of all child processes
+ * :attr:`children_system` - system time of all child processes
+ * :attr:`elapsed` - elapsed real time since a fixed point in the past
+
+ For backwards compatibility, this object also behaves like a five-tuple
+ containing :attr:`user`, :attr:`system`, :attr:`children_user`,
+ :attr:`children_system`, and :attr:`elapsed` in that order.
+
+ See the Unix manual page
:manpage:`times(2)` or the corresponding Windows Platform API documentation.
- On Windows, only the first two items are filled, the others are zero.
+ On Windows, only :attr:`user` and :attr:`system` are known; the other
+ attributes are zero.
+ On OS/2, only :attr:`elapsed` is known; the other attributes are zero.
- Availability: Unix, Windows
+ Availability: Unix, Windows.
+
+ .. versionchanged:: 3.3
+ Return type changed from a tuple to a tuple-like object
+ with named attributes.
.. function:: wait()
@@ -2046,6 +2844,58 @@ written in Python, such as a mail server's external command delivery program.
Availability: Unix.
+.. function:: waitid(idtype, id, options)
+
+ Wait for the completion of one or more child processes.
+ *idtype* can be :data:`P_PID`, :data:`P_PGID` or :data:`P_ALL`.
+ *id* specifies the pid to wait on.
+ *options* is constructed from the ORing of one or more of :data:`WEXITED`,
+ :data:`WSTOPPED` or :data:`WCONTINUED` and additionally may be ORed with
+ :data:`WNOHANG` or :data:`WNOWAIT`. The return value is an object
+ representing the data contained in the :c:type:`siginfo_t` structure, namely:
+ :attr:`si_pid`, :attr:`si_uid`, :attr:`si_signo`, :attr:`si_status`,
+ :attr:`si_code` or ``None`` if :data:`WNOHANG` is specified and there are no
+ children in a waitable state.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+.. data:: P_PID
+ P_PGID
+ P_ALL
+
+ These are the possible values for *idtype* in :func:`waitid`. They affect
+ how *id* is interpreted.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+.. data:: WEXITED
+ WSTOPPED
+ WNOWAIT
+
+ Flags that can be used in *options* in :func:`waitid` that specify what
+ child signal to wait for.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. data:: CLD_EXITED
+ CLD_DUMPED
+ CLD_TRAPPED
+ CLD_CONTINUED
+
+ These are the possible values for :attr:`si_code` in the result returned by
+ :func:`waitid`.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
.. function:: waitpid(pid, options)
@@ -2110,7 +2960,7 @@ written in Python, such as a mail server's external command delivery program.
This option causes child processes to be reported if they have been continued
from a job control stop since their status was last reported.
- Availability: Some Unix systems.
+ Availability: some Unix systems.
.. data:: WUNTRACED
@@ -2187,6 +3037,129 @@ used to determine the disposition of a process.
Availability: Unix.
+Interface to the scheduler
+--------------------------
+
+These functions control how a process is allocated CPU time by the operating
+system. They are only available on some Unix platforms. For more detailed
+information, consult your Unix manpages.
+
+.. versionadded:: 3.3
+
+The following scheduling policies are exposed if they are a supported by the
+operating system.
+
+.. data:: SCHED_OTHER
+
+ The default scheduling policy.
+
+.. data:: SCHED_BATCH
+
+ Scheduling policy for CPU-intensive processes that tries to preserve
+ interactivity on the rest of the computer.
+
+.. data:: SCHED_IDLE
+
+ Scheduling policy for extremely low priority background tasks.
+
+.. data:: SCHED_SPORADIC
+
+ Scheduling policy for sporadic server programs.
+
+.. data:: SCHED_FIFO
+
+ A First In First Out scheduling policy.
+
+.. data:: SCHED_RR
+
+ A round-robin scheduling policy.
+
+.. data:: SCHED_RESET_ON_FORK
+
+ This flag can OR'ed with any other scheduling policy. When a process with
+ this flag set forks, its child's scheduling policy and priority are reset to
+ the default.
+
+
+.. class:: sched_param(sched_priority)
+
+ This class represents tunable scheduling parameters used in
+ :func:`sched_setparam`, :func:`sched_setscheduler`, and
+ :func:`sched_getparam`. It is immutable.
+
+ At the moment, there is only one possible parameter:
+
+ .. attribute:: sched_priority
+
+ The scheduling priority for a scheduling policy.
+
+
+.. function:: sched_get_priority_min(policy)
+
+ Get the minimum priority value for *policy*. *policy* is one of the
+ scheduling policy constants above.
+
+
+.. function:: sched_get_priority_max(policy)
+
+ Get the maximum priority value for *policy*. *policy* is one of the
+ scheduling policy constants above.
+
+
+.. function:: sched_setscheduler(pid, policy, param)
+
+ Set the scheduling policy for the process with PID *pid*. A *pid* of 0 means
+ the calling process. *policy* is one of the scheduling policy constants
+ above. *param* is a :class:`sched_param` instance.
+
+
+.. function:: sched_getscheduler(pid)
+
+ Return the scheduling policy for the process with PID *pid*. A *pid* of 0
+ means the calling process. The result is one of the scheduling policy
+ constants above.
+
+
+.. function:: sched_setparam(pid, param)
+
+ Set a scheduling parameters for the process with PID *pid*. A *pid* of 0 means
+ the calling process. *param* is a :class:`sched_param` instance.
+
+
+.. function:: sched_getparam(pid)
+
+ Return the scheduling parameters as a :class:`sched_param` instance for the
+ process with PID *pid*. A *pid* of 0 means the calling process.
+
+
+.. function:: sched_rr_get_interval(pid)
+
+ Return the round-robin quantum in seconds for the process with PID *pid*. A
+ *pid* of 0 means the calling process.
+
+
+.. function:: sched_yield()
+
+ Voluntarily relinquish the CPU.
+
+
+.. function:: sched_setaffinity(pid, mask)
+
+ Restrict the process with PID *pid* (or the current process if zero) to a
+ set of CPUs. *mask* is an iterable of integers representing the set of
+ CPUs to which the process should be restricted.
+
+
+.. function:: sched_getaffinity(pid)
+
+ Return the set of CPUs the process with PID *pid* (or the current process
+ if zero) is restricted to.
+
+ .. seealso::
+ :func:`multiprocessing.cpu_count` returns the number of CPUs in the
+ system.
+
+
.. _os-path:
Miscellaneous System Information
@@ -2211,7 +3184,7 @@ Miscellaneous System Information
included in ``confstr_names``, an :exc:`OSError` is raised with
:const:`errno.EINVAL` for the error number.
- Availability: Unix
+ Availability: Unix.
.. data:: confstr_names
@@ -2333,6 +3306,9 @@ Miscellaneous Functions
This function returns random bytes from an OS-specific randomness source. The
returned data should be unpredictable enough for cryptographic applications,
- though its exact quality depends on the OS implementation. On a UNIX-like
+ though its exact quality depends on the OS implementation. On a Unix-like
system this will query /dev/urandom, and on Windows it will use CryptGenRandom.
If a randomness source is not found, :exc:`NotImplementedError` will be raised.
+
+ For an easy-to-use interface to the random number generator
+ provided by your platform, please see :class:`random.SystemRandom`.
diff --git a/Doc/library/ossaudiodev.rst b/Doc/library/ossaudiodev.rst
index ed84413..3002700 100644
--- a/Doc/library/ossaudiodev.rst
+++ b/Doc/library/ossaudiodev.rst
@@ -38,6 +38,10 @@ the standard audio interface for Linux and recent versions of FreeBSD.
This probably all warrants a footnote or two, but I don't understand
things well enough right now to write it! --GPW
+.. versionchanged:: 3.3
+ Operations in this module now raise :exc:`OSError` where :exc:`IOError`
+ was raised.
+
.. seealso::
@@ -56,7 +60,7 @@ the standard audio interface for Linux and recent versions of FreeBSD.
what went wrong.
(If :mod:`ossaudiodev` receives an error from a system call such as
- :c:func:`open`, :c:func:`write`, or :c:func:`ioctl`, it raises :exc:`IOError`.
+ :c:func:`open`, :c:func:`write`, or :c:func:`ioctl`, it raises :exc:`OSError`.
Errors detected directly by :mod:`ossaudiodev` result in :exc:`OSSAudioError`.)
(For backwards compatibility, the exception class is also available as
@@ -169,7 +173,7 @@ The following methods each map to exactly one :func:`ioctl` system call. The
correspondence is obvious: for example, :meth:`setfmt` corresponds to the
``SNDCTL_DSP_SETFMT`` ioctl, and :meth:`sync` to ``SNDCTL_DSP_SYNC`` (this can
be useful when consulting the OSS documentation). If the underlying
-:func:`ioctl` fails, they all raise :exc:`IOError`.
+:func:`ioctl` fails, they all raise :exc:`OSError`.
.. method:: oss_audio_device.nonblock()
@@ -345,7 +349,7 @@ The mixer object provides two file-like methods:
.. method:: oss_mixer_device.close()
This method closes the open mixer device file. Any further attempts to use the
- mixer after this file is closed will raise an :exc:`IOError`.
+ mixer after this file is closed will raise an :exc:`OSError`.
.. method:: oss_mixer_device.fileno()
@@ -404,7 +408,7 @@ The remaining methods are specific to audio mixing:
returned, but both volumes are the same.
Raises :exc:`OSSAudioError` if an invalid control was is specified, or
- :exc:`IOError` if an unsupported control is specified.
+ :exc:`OSError` if an unsupported control is specified.
.. method:: oss_mixer_device.set(control, (left, right))
@@ -428,7 +432,7 @@ The remaining methods are specific to audio mixing:
.. method:: oss_mixer_device.set_recsrc(bitmask)
Call this function to specify a recording source. Returns a bitmask indicating
- the new recording source (or sources) if successful; raises :exc:`IOError` if an
+ the new recording source (or sources) if successful; raises :exc:`OSError` if an
invalid source was specified. To set the current recording source to the
microphone input::
diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst
index 1e9de63..f4e37ac 100644
--- a/Doc/library/pdb.rst
+++ b/Doc/library/pdb.rst
@@ -38,6 +38,11 @@ of the debugger is::
> <string>(1)?()
(Pdb)
+.. versionchanged:: 3.3
+ Tab-completion via the :mod:`readline` module is available for commands and
+ command arguments, e.g. the current global and local names are offered as
+ arguments of the ``print`` command.
+
:file:`pdb.py` can also be invoked as a script to debug other scripts. For
example::
diff --git a/Doc/library/pickle.rst b/Doc/library/pickle.rst
index a1f9af2..287a08b 100644
--- a/Doc/library/pickle.rst
+++ b/Doc/library/pickle.rst
@@ -287,6 +287,29 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and
See :ref:`pickle-persistent` for details and examples of uses.
+ .. attribute:: dispatch_table
+
+ A pickler object's dispatch table is a registry of *reduction
+ functions* of the kind which can be declared using
+ :func:`copyreg.pickle`. It is a mapping whose keys are classes
+ and whose values are reduction functions. A reduction function
+ takes a single argument of the associated class and should
+ conform to the same interface as a :meth:`~object.__reduce__`
+ method.
+
+ By default, a pickler object will not have a
+ :attr:`dispatch_table` attribute, and it will instead use the
+ global dispatch table managed by the :mod:`copyreg` module.
+ However, to customize the pickling for a specific pickler object
+ one can set the :attr:`dispatch_table` attribute to a dict-like
+ object. Alternatively, if a subclass of :class:`Pickler` has a
+ :attr:`dispatch_table` attribute then this will be used as the
+ default dispatch table for instances of that class.
+
+ See :ref:`pickle-dispatch` for usage examples.
+
+ .. versionadded:: 3.3
+
.. attribute:: fast
Deprecated. Enable fast mode if set to a true value. The fast mode
@@ -577,6 +600,44 @@ pickle external objects by reference.
.. literalinclude:: ../includes/dbpickle.py
+.. _pickle-dispatch:
+
+Dispatch Tables
+^^^^^^^^^^^^^^^
+
+If one wants to customize pickling of some classes without disturbing
+any other code which depends on pickling, then one can create a
+pickler with a private dispatch table.
+
+The global dispatch table managed by the :mod:`copyreg` module is
+available as :data:`copyreg.dispatch_table`. Therefore, one may
+choose to use a modified copy of :data:`copyreg.dispatch_table` as a
+private dispatch table.
+
+For example ::
+
+ f = io.BytesIO()
+ p = pickle.Pickler(f)
+ p.dispatch_table = copyreg.dispatch_table.copy()
+ p.dispatch_table[SomeClass] = reduce_SomeClass
+
+creates an instance of :class:`pickle.Pickler` with a private dispatch
+table which handles the ``SomeClass`` class specially. Alternatively,
+the code ::
+
+ class MyPickler(pickle.Pickler):
+ dispatch_table = copyreg.dispatch_table.copy()
+ dispatch_table[SomeClass] = reduce_SomeClass
+ f = io.BytesIO()
+ p = MyPickler(f)
+
+does the same, but all instances of ``MyPickler`` will by default
+share the same dispatch table. The equivalent code using the
+:mod:`copyreg` module is ::
+
+ copyreg.pickle(SomeClass, reduce_SomeClass)
+ f = io.BytesIO()
+ p = pickle.Pickler(f)
.. _pickle-state:
diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst
index 3118ff2..22d44eb 100644
--- a/Doc/library/pkgutil.rst
+++ b/Doc/library/pkgutil.rst
@@ -56,21 +56,32 @@ support.
Note that :class:`ImpImporter` does not currently support being used by
placement on :data:`sys.meta_path`.
+ .. deprecated:: 3.3
+ This emulation is no longer needed, as the standard import mechanism
+ is now fully PEP 302 compliant and available in :mod:`importlib`
+
.. class:: ImpLoader(fullname, file, filename, etc)
:pep:`302` Loader that wraps Python's "classic" import algorithm.
+ .. deprecated:: 3.3
+ This emulation is no longer needed, as the standard import mechanism
+ is now fully PEP 302 compliant and available in :mod:`importlib`
+
.. function:: find_loader(fullname)
- Find a :pep:`302` "loader" object for *fullname*.
+ Retrieve a :pep:`302` module loader for the given *fullname*.
- If *fullname* contains dots, path must be the containing package's
- ``__path__``. Returns ``None`` if the module cannot be found or imported.
- This function uses :func:`iter_importers`, and is thus subject to the same
- limitations regarding platform-specific special import locations such as the
- Windows registry.
+ This is a convenience wrapper around :func:`importlib.find_loader` that
+ sets the *path* argument correctly when searching for submodules, and
+ also ensures parent packages (if any) are imported before searching for
+ submodules.
+
+ .. versionchanged:: 3.3
+ Updated to be based directly on :mod:`importlib` rather than relying
+ on the package internal PEP 302 import emulation.
.. function:: get_importer(path_item)
@@ -80,13 +91,13 @@ support.
The returned importer is cached in :data:`sys.path_importer_cache` if it was
newly created by a path hook.
- If there is no importer, a wrapper around the basic import machinery is
- returned. This wrapper is never inserted into the importer cache (``None``
- is inserted instead).
-
The cache (or part of it) can be cleared manually if a rescan of
:data:`sys.path_hooks` is necessary.
+ .. versionchanged:: 3.3
+ Updated to be based directly on :mod:`importlib` rather than relying
+ on the package internal PEP 302 import emulation.
+
.. function:: get_loader(module_or_name)
@@ -102,46 +113,52 @@ support.
limitations regarding platform-specific special import locations such as the
Windows registry.
+ .. versionchanged:: 3.3
+ Updated to be based directly on :mod:`importlib` rather than relying
+ on the package internal PEP 302 import emulation.
+
.. function:: iter_importers(fullname='')
Yield :pep:`302` importers for the given module name.
- If fullname contains a '.', the importers will be for the package containing
- fullname, otherwise they will be importers for :data:`sys.meta_path`,
- :data:`sys.path`, and Python's "classic" import machinery, in that order. If
- the named module is in a package, that package is imported as a side effect
- of invoking this function.
+ If fullname contains a '.', the importers will be for the package
+ containing fullname, otherwise they will be all registered top level
+ importers (i.e. those on both sys.meta_path and sys.path_hooks).
- Non-:pep:`302` mechanisms (e.g. the Windows registry) used by the standard
- import machinery to find files in alternative locations are partially
- supported, but are searched *after* :data:`sys.path`. Normally, these
- locations are searched *before* :data:`sys.path`, preventing :data:`sys.path`
- entries from shadowing them.
+ If the named module is in a package, that package is imported as a side
+ effect of invoking this function.
- For this to cause a visible difference in behaviour, there must be a module
- or package name that is accessible via both :data:`sys.path` and one of the
- non-:pep:`302` file system mechanisms. In this case, the emulation will find
- the former version, while the builtin import mechanism will find the latter.
+ If no module name is specified, all top level importers are produced.
- Items of the following types can be affected by this discrepancy:
- ``imp.C_EXTENSION``, ``imp.PY_SOURCE``, ``imp.PY_COMPILED``,
- ``imp.PKG_DIRECTORY``.
+ .. versionchanged:: 3.3
+ Updated to be based directly on :mod:`importlib` rather than relying
+ on the package internal PEP 302 import emulation.
.. function:: iter_modules(path=None, prefix='')
- Yields ``(module_loader, name, ispkg)`` for all submodules on *path*, or, if
+ Yields ``(module_finder, name, ispkg)`` for all submodules on *path*, or, if
path is ``None``, all top-level modules on ``sys.path``.
*path* should be either ``None`` or a list of paths to look for modules in.
*prefix* is a string to output on the front of every module name on output.
+ .. note::
+ Only works for a :term:`finder` which defines an ``iter_modules()``
+ method. This interface is non-standard, so the module also provides
+ implementations for :class:`importlib.machinery.FileFinder` and
+ :class:`zipimport.zipimporter`.
+
+ .. versionchanged:: 3.3
+ Updated to be based directly on :mod:`importlib` rather than relying
+ on the package internal PEP 302 import emulation.
+
.. function:: walk_packages(path=None, prefix='', onerror=None)
- Yields ``(module_loader, name, ispkg)`` for all modules recursively on
+ Yields ``(module_finder, name, ispkg)`` for all modules recursively on
*path*, or, if path is ``None``, all accessible modules.
*path* should be either ``None`` or a list of paths to look for modules in.
@@ -166,6 +183,16 @@ support.
# list all submodules of ctypes
walk_packages(ctypes.__path__, ctypes.__name__ + '.')
+ .. note::
+ Only works for a :term:`finder` which defines an ``iter_modules()``
+ method. This interface is non-standard, so the module also provides
+ implementations for :class:`importlib.machinery.FileFinder` and
+ :class:`zipimport.zipimporter`.
+
+ .. versionchanged:: 3.3
+ Updated to be based directly on :mod:`importlib` rather than relying
+ on the package internal PEP 302 import emulation.
+
.. function:: get_data(package, resource)
diff --git a/Doc/library/platform.rst b/Doc/library/platform.rst
index 157ac3a..a6a98f1 100644
--- a/Doc/library/platform.rst
+++ b/Doc/library/platform.rst
@@ -30,8 +30,8 @@ Cross Platform
returned as strings.
Values that cannot be determined are returned as given by the parameter presets.
- If bits is given as ``''``, the :c:func:`sizeof(pointer)` (or
- :c:func:`sizeof(long)` on Python version < 1.5.2) is used as indicator for the
+ If bits is given as ``''``, the ``sizeof(pointer)`` (or
+ ``sizeof(long)`` on Python version < 1.5.2) is used as indicator for the
supported pointer size.
The function relies on the system's :file:`file` command to do the actual work.
@@ -158,14 +158,20 @@ Cross Platform
.. function:: uname()
- Fairly portable uname interface. Returns a tuple of strings ``(system, node,
- release, version, machine, processor)`` identifying the underlying platform.
+ Fairly portable uname interface. Returns a :func:`~collections.namedtuple`
+ containing six attributes: :attr:`system`, :attr:`node`, :attr:`release`,
+ :attr:`version`, :attr:`machine`, and :attr:`processor`.
- Note that unlike the :func:`os.uname` function this also returns possible
- processor information as additional tuple entry.
+ Note that this adds a sixth attribute (:attr:`processor`) not present
+ in the :func:`os.uname` result. Also, the attribute names are different
+ for the first two attributes; :func:`os.uname` names them
+ :attr:`sysname` and :attr:`nodename`.
Entries which cannot be determined are set to ``''``.
+ .. versionchanged:: 3.3
+ Result changed from a tuple to a namedtuple.
+
Java Platform
-------------
@@ -214,6 +220,10 @@ Win95/98 specific
preferring :func:`win32pipe.popen`. On Windows NT, :func:`win32pipe.popen`
should work; on Windows 9x it hangs due to bugs in the MS C library.
+ .. deprecated:: 3.3
+ This function is obsolete. Use the :mod:`subprocess` module. Check
+ especially the :ref:`subprocess-replacements` section.
+
Mac OS Platform
---------------
diff --git a/Doc/library/random.rst b/Doc/library/random.rst
index 1cd4d26..55c9d70 100644
--- a/Doc/library/random.rst
+++ b/Doc/library/random.rst
@@ -43,6 +43,12 @@ The :mod:`random` module also provides the :class:`SystemRandom` class which
uses the system function :func:`os.urandom` to generate random numbers
from sources provided by the operating system.
+.. warning::
+
+ The generators of the :mod:`random` module should not be used for security
+ purposes. Use :func:`ssl.RAND_bytes` if you require a cryptographically
+ secure pseudorandom number generator.
+
Bookkeeping functions:
@@ -145,6 +151,9 @@ Functions for sequences:
argument. This is especially fast and space efficient for sampling from a large
population: ``sample(range(10000000), 60)``.
+ If the sample size is larger than the population size, a :exc:`ValueError`
+ is raised.
+
The following functions generate specific real-valued distributions. Function
parameters are named after the corresponding variables in the distribution's
equation, as used in common mathematical practice; most of these equations can
diff --git a/Doc/library/re.rst b/Doc/library/re.rst
index 8397aad..07623c9 100644
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -414,17 +414,24 @@ Most of the standard escapes supported by Python string literals are also
accepted by the regular expression parser::
\a \b \f \n
- \r \t \v \x
- \\
+ \r \t \u \U
+ \v \x \\
(Note that ``\b`` is used to represent word boundaries, and means "backspace"
only inside character classes.)
+``'\u'`` and ``'\U'`` escape sequences are only recognized in Unicode
+patterns. In bytes patterns they are not treated specially.
+
Octal escapes are included in a limited form. If the first digit is a 0, or if
there are three octal digits, it is considered an octal escape. Otherwise, it is
a group reference. As for string literals, octal escapes are always at most
three digits in length.
+.. versionchanged:: 3.3
+ The ``'\u'`` and ``'\U'`` escape sequences have been added.
+
+
.. _contents-of-module-re:
@@ -684,9 +691,12 @@ form.
.. function:: escape(string)
- Return *string* with all non-alphanumerics backslashed; this is useful if you
- want to match an arbitrary literal string that may have regular expression
- metacharacters in it.
+ Escape all the characters in pattern except ASCII letters, numbers and ``'_'``.
+ This is useful if you want to match an arbitrary literal string that may
+ have regular expression metacharacters in it.
+
+ .. versionchanged:: 3.3
+ The ``'_'`` character is no longer escaped.
.. function:: purge()
diff --git a/Doc/library/readline.rst b/Doc/library/readline.rst
index ab55197..1134619 100644
--- a/Doc/library/readline.rst
+++ b/Doc/library/readline.rst
@@ -199,7 +199,7 @@ normally be executed automatically during interactive sessions from the user's
histfile = os.path.join(os.path.expanduser("~"), ".pyhist")
try:
readline.read_history_file(histfile)
- except IOError:
+ except FileNotFoundError:
pass
import atexit
atexit.register(readline.write_history_file, histfile)
@@ -224,7 +224,7 @@ support history save/restore. ::
if hasattr(readline, "read_history_file"):
try:
readline.read_history_file(histfile)
- except IOError:
+ except FileNotFoundError:
pass
atexit.register(self.save_history, histfile)
diff --git a/Doc/library/resource.rst b/Doc/library/resource.rst
index c16b013..03a7cb5 100644
--- a/Doc/library/resource.rst
+++ b/Doc/library/resource.rst
@@ -14,13 +14,15 @@ resources utilized by a program.
Symbolic constants are used to specify particular system resources and to
request usage information about either the current process or its children.
-A single exception is defined for errors:
+An :exc:`OSError` is raised on syscall failure.
.. exception:: error
- The functions described below may raise this error if the underlying system call
- failures unexpectedly.
+ A deprecated alias of :exc:`OSError`.
+
+ .. versionchanged:: 3.3
+ Following :pep:`3151`, this class was made an alias of :exc:`OSError`.
Resource Limits
diff --git a/Doc/library/sched.rst b/Doc/library/sched.rst
index 000dba0..d6c86c7 100644
--- a/Doc/library/sched.rst
+++ b/Doc/library/sched.rst
@@ -14,7 +14,7 @@
The :mod:`sched` module defines a class which implements a general purpose event
scheduler:
-.. class:: scheduler(timefunc, delayfunc)
+.. class:: scheduler(timefunc=time.time, delayfunc=time.sleep)
The :class:`scheduler` class defines a generic interface to scheduling events.
It needs two functions to actually deal with the "outside world" --- *timefunc*
@@ -25,6 +25,12 @@ scheduler:
event is run to allow other threads an opportunity to run in multi-threaded
applications.
+ .. versionchanged:: 3.3
+ *timefunc* and *delayfunc* parameters are optional.
+ .. versionchanged:: 3.3
+ :class:`scheduler` class can be safely used in multi-threaded
+ environments.
+
Example::
>>> import sched, time
@@ -44,33 +50,6 @@ Example::
From print_time 930343700.273
930343700.276
-In multi-threaded environments, the :class:`scheduler` class has limitations
-with respect to thread-safety, inability to insert a new task before
-the one currently pending in a running scheduler, and holding up the main
-thread until the event queue is empty. Instead, the preferred approach
-is to use the :class:`threading.Timer` class instead.
-
-Example::
-
- >>> import time
- >>> from threading import Timer
- >>> def print_time():
- ... print("From print_time", time.time())
- ...
- >>> def print_some_times():
- ... print(time.time())
- ... Timer(5, print_time, ()).start()
- ... Timer(10, print_time, ()).start()
- ... time.sleep(11) # sleep while time-delay events execute
- ... print(time.time())
- ...
- >>> print_some_times()
- 930343690.257
- From print_time 930343695.274
- From print_time 930343700.273
- 930343701.301
-
-
.. _scheduler-objects:
Scheduler Objects
@@ -79,26 +58,38 @@ Scheduler Objects
:class:`scheduler` instances have the following methods and attributes:
-.. method:: scheduler.enterabs(time, priority, action, argument)
+.. method:: scheduler.enterabs(time, priority, action, argument=[], kwargs={})
Schedule a new event. The *time* argument should be a numeric type compatible
with the return value of the *timefunc* function passed to the constructor.
Events scheduled for the same *time* will be executed in the order of their
*priority*.
- Executing the event means executing ``action(*argument)``. *argument* must be a
- sequence holding the parameters for *action*.
+ Executing the event means executing ``action(*argument, **kwargs)``.
+ *argument* must be a sequence holding the parameters for *action*.
+ *kwargs* must be a dictionary holding the keyword parameters for *action*.
Return value is an event which may be used for later cancellation of the event
(see :meth:`cancel`).
+ .. versionchanged:: 3.3
+ *argument* parameter is optional.
-.. method:: scheduler.enter(delay, priority, action, argument)
+ .. versionadded:: 3.3
+ *kwargs* parameter was added.
+
+
+.. method:: scheduler.enter(delay, priority, action, argument=[], kwargs={})
Schedule an event for *delay* more time units. Other than the relative time, the
other arguments, the effect and the return value are the same as those for
:meth:`enterabs`.
+ .. versionchanged:: 3.3
+ *argument* parameter is optional.
+
+ .. versionadded:: 3.3
+ *kwargs* parameter was added.
.. method:: scheduler.cancel(event)
@@ -111,12 +102,16 @@ Scheduler Objects
Return true if the event queue is empty.
-.. method:: scheduler.run()
+.. method:: scheduler.run(blocking=True)
- Run all scheduled events. This function will wait (using the :func:`delayfunc`
+ Run all scheduled events. This method will wait (using the :func:`delayfunc`
function passed to the constructor) for the next event, then execute it and so
on until there are no more scheduled events.
+ If *blocking* is False executes the scheduled events due to expire soonest
+ (if any) and then return the deadline of the next scheduled call in the
+ scheduler (if any).
+
Either *action* or *delayfunc* can raise an exception. In either case, the
scheduler will maintain a consistent state and propagate the exception. If an
exception is raised by *action*, the event will not be attempted in future calls
@@ -127,6 +122,9 @@ Scheduler Objects
the calling code is responsible for canceling events which are no longer
pertinent.
+ .. versionadded:: 3.3
+ *blocking* parameter was added.
+
.. attribute:: scheduler.queue
Read-only attribute returning a list of upcoming events in the order they
diff --git a/Doc/library/select.rst b/Doc/library/select.rst
index a450ec2..4e60f4a 100644
--- a/Doc/library/select.rst
+++ b/Doc/library/select.rst
@@ -6,7 +6,8 @@
This module provides access to the :c:func:`select` and :c:func:`poll` functions
-available in most operating systems, :c:func:`epoll` available on Linux 2.5+ and
+available in most operating systems, :c:func:`devpoll` available on
+Solaris and derivatives, :c:func:`epoll` available on Linux 2.5+ and
:c:func:`kqueue` available on most BSD.
Note that on Windows, it only works for sockets; on other operating systems,
it also works for other file types (in particular, on Unix, it works on pipes).
@@ -18,17 +19,38 @@ The module defines the following:
.. exception:: error
- The exception raised when an error occurs. The accompanying value is a pair
- containing the numeric error code from :c:data:`errno` and the corresponding
- string, as would be printed by the C function :c:func:`perror`.
+ A deprecated alias of :exc:`OSError`.
+ .. versionchanged:: 3.3
+ Following :pep:`3151`, this class was made an alias of :exc:`OSError`.
-.. function:: epoll(sizehint=-1)
- (Only supported on Linux 2.5.44 and newer.) Returns an edge polling object,
- which can be used as Edge or Level Triggered interface for I/O events; see
- section :ref:`epoll-objects` below for the methods supported by epolling
- objects.
+.. function:: devpoll()
+
+ (Only supported on Solaris and derivatives.) Returns a ``/dev/poll``
+ polling object; see section :ref:`devpoll-objects` below for the
+ methods supported by devpoll objects.
+
+ :c:func:`devpoll` objects are linked to the number of file
+ descriptors allowed at the time of instantiation. If your program
+ reduces this value, :c:func:`devpoll` will fail. If your program
+ increases this value, :c:func:`devpoll` may return an
+ incomplete list of active file descriptors.
+
+ .. versionadded:: 3.3
+
+.. function:: epoll(sizehint=-1, flags=0)
+
+ (Only supported on Linux 2.5.44 and newer.) Return an edge polling object,
+ which can be used as Edge or Level Triggered interface for I/O
+ events. *sizehint* is deprecated and completely ignored. *flags* can be set
+ to :const:`EPOLL_CLOEXEC`, which causes the epoll descriptor to be closed
+ automatically when :func:`os.execve` is called. See section
+ :ref:`epoll-objects` below for the methods supported by epolling objects.
+
+
+ .. versionchanged:: 3.3
+ Added the *flags* parameter.
.. function:: poll()
@@ -106,6 +128,74 @@ The module defines the following:
.. versionadded:: 3.2
+.. _devpoll-objects:
+
+``/dev/poll`` Polling Objects
+----------------------------------------------
+
+ http://developers.sun.com/solaris/articles/using_devpoll.html
+ http://developers.sun.com/solaris/articles/polling_efficient.html
+
+Solaris and derivatives have ``/dev/poll``. While :c:func:`select` is
+O(highest file descriptor) and :c:func:`poll` is O(number of file
+descriptors), ``/dev/poll`` is O(active file descriptors).
+
+``/dev/poll`` behaviour is very close to the standard :c:func:`poll`
+object.
+
+
+.. method:: devpoll.register(fd[, eventmask])
+
+ Register a file descriptor with the polling object. Future calls to the
+ :meth:`poll` method will then check whether the file descriptor has any pending
+ I/O events. *fd* can be either an integer, or an object with a :meth:`fileno`
+ method that returns an integer. File objects implement :meth:`fileno`, so they
+ can also be used as the argument.
+
+ *eventmask* is an optional bitmask describing the type of events you want to
+ check for. The constants are the same that with :c:func:`poll`
+ object. The default value is a combination of the constants :const:`POLLIN`,
+ :const:`POLLPRI`, and :const:`POLLOUT`.
+
+ .. warning::
+
+ Registering a file descriptor that's already registered is not an
+ error, but the result is undefined. The appropiate action is to
+ unregister or modify it first. This is an important difference
+ compared with :c:func:`poll`.
+
+
+.. method:: devpoll.modify(fd[, eventmask])
+
+ This method does an :meth:`unregister` followed by a
+ :meth:`register`. It is (a bit) more efficient that doing the same
+ explicitly.
+
+
+.. method:: devpoll.unregister(fd)
+
+ Remove a file descriptor being tracked by a polling object. Just like the
+ :meth:`register` method, *fd* can be an integer or an object with a
+ :meth:`fileno` method that returns an integer.
+
+ Attempting to remove a file descriptor that was never registered is
+ safely ignored.
+
+
+.. method:: devpoll.poll([timeout])
+
+ Polls the set of registered file descriptors, and returns a possibly-empty list
+ containing ``(fd, event)`` 2-tuples for the descriptors that have events or
+ errors to report. *fd* is the file descriptor, and *event* is a bitmask with
+ bits set for the reported events for that descriptor --- :const:`POLLIN` for
+ waiting input, :const:`POLLOUT` to indicate that the descriptor can be written
+ to, and so forth. An empty list indicates that the call timed out and no file
+ descriptors had any events to report. If *timeout* is given, it specifies the
+ length of time in milliseconds which the system will wait for events before
+ returning. If *timeout* is omitted, -1, or :const:`None`, the call will
+ block until there is an event for this poll object.
+
+
.. _epoll-objects:
Edge and Level Trigger Polling (epoll) Objects
@@ -165,11 +255,6 @@ Edge and Level Trigger Polling (epoll) Objects
Register a fd descriptor with the epoll object.
- .. note::
-
- Registering a file descriptor that's already registered raises an
- IOError -- contrary to :ref:`poll-objects`'s register.
-
.. method:: epoll.modify(fd, eventmask)
diff --git a/Doc/library/shlex.rst b/Doc/library/shlex.rst
index 0113fb7..941e090 100644
--- a/Doc/library/shlex.rst
+++ b/Doc/library/shlex.rst
@@ -34,6 +34,40 @@ The :mod:`shlex` module defines the following functions:
passing ``None`` for *s* will read the string to split from standard
input.
+
+.. function:: quote(s)
+
+ Return a shell-escaped version of the string *s*. The returned value is a
+ string that can safely be used as one token in a shell command line, for
+ cases where you cannot use a list.
+
+ This idiom would be unsafe::
+
+ >>> filename = 'somefile; rm -rf ~'
+ >>> command = 'ls -l {}'.format(filename)
+ >>> print(command) # executed by a shell: boom!
+ ls -l somefile; rm -rf ~
+
+ :func:`quote` lets you plug the security hole::
+
+ >>> command = 'ls -l {}'.format(quote(filename))
+ >>> print(command)
+ ls -l 'somefile; rm -rf ~'
+ >>> remote_command = 'ssh home {}'.format(quote(command))
+ >>> print(remote_command)
+ ssh home 'ls -l '"'"'somefile; rm -rf ~'"'"''
+
+ The quoting is compatible with UNIX shells and with :func:`split`:
+
+ >>> remote_command = split(remote_command)
+ >>> remote_command
+ ['ssh', 'home', "ls -l 'somefile; rm -rf ~'"]
+ >>> command = split(remote_command[-1])
+ >>> command
+ ['ls', '-l', 'somefile; rm -rf ~']
+
+ .. versionadded:: 3.3
+
The :mod:`shlex` module defines the following class:
@@ -282,5 +316,4 @@ parsing rules.
* EOF is signaled with a :const:`None` value;
-* Quoted empty strings (``''``) are allowed;
-
+* Quoted empty strings (``''``) are allowed.
diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst
index 18f6485..080c923 100644
--- a/Doc/library/shutil.rst
+++ b/Doc/library/shutil.rst
@@ -47,45 +47,129 @@ Directory and files operations
be copied.
-.. function:: copyfile(src, dst)
+.. function:: copyfile(src, dst, *, follow_symlinks=True)
Copy the contents (no metadata) of the file named *src* to a file named
- *dst*. *dst* must be the complete target file name; look at
- :func:`shutil.copy` for a copy that accepts a target directory path. If
- *src* and *dst* are the same files, :exc:`Error` is raised.
- The destination location must be writable; otherwise, an :exc:`IOError` exception
- will be raised. If *dst* already exists, it will be replaced. Special files
- such as character or block devices and pipes cannot be copied with this
- function. *src* and *dst* are path names given as strings.
+ *dst* and return *dst*. *src* and *dst* are path names given as strings.
+ *dst* must be the complete target file name; look at :func:`shutil.copy`
+ for a copy that accepts a target directory path. If *src* and *dst*
+ specify the same file, :exc:`Error` is raised.
+ The destination location must be writable; otherwise, an :exc:`OSError`
+ exception will be raised. If *dst* already exists, it will be replaced.
+ Special files such as character or block devices and pipes cannot be
+ copied with this function.
-.. function:: copymode(src, dst)
+ If *follow_symlinks* is false and *src* is a symbolic link,
+ a new symbolic link will be created instead of copying the
+ file *src* points to.
+
+ .. versionchanged:: 3.3
+ :exc:`IOError` used to be raised instead of :exc:`OSError`.
+ Added *follow_symlinks* argument.
+ Now returns *dst*.
+
+.. function:: copymode(src, dst, *, follow_symlinks=True)
Copy the permission bits from *src* to *dst*. The file contents, owner, and
group are unaffected. *src* and *dst* are path names given as strings.
+ If *follow_symlinks* is false, and both *src* and *dst* are symbolic links,
+ :func:`copymode` will attempt to modify the mode of *dst* itself (rather
+ than the file it points to). This functionality is not available on every
+ platform; please see :func:`copystat` for more information. If
+ :func:`copymode` cannot modify symbolic links on the local platform, and it
+ is asked to do so, it will do nothing and return.
+
+ .. versionchanged:: 3.3
+ Added *follow_symlinks* argument.
+
+.. function:: copystat(src, dst, *, follow_symlinks=True)
+
+ Copy the permission bits, last access time, last modification time, and
+ flags from *src* to *dst*. On Linux, :func:`copystat` also copies the
+ "extended attributes" where possible. The file contents, owner, and
+ group are unaffected. *src* and *dst* are path names given as strings.
+
+ If *follow_symlinks* is false, and *src* and *dst* both
+ refer to symbolic links, :func:`copystat` will operate on
+ the symbolic links themselves rather than the files the
+ symbolic links refer to--reading the information from the
+ *src* symbolic link, and writing the information to the
+ *dst* symbolic link.
+
+ .. note::
+
+ Not all platforms provide the ability to examine and
+ modify symbolic links. Python itself can tell you what
+ functionality is locally available.
+ * If ``os.chmod in os.supports_follow_symlinks`` is
+ ``True``, :func:`copystat` can modify the permission
+ bits of a symbolic link.
-.. function:: copystat(src, dst)
+ * If ``os.utime in os.supports_follow_symlinks`` is
+ ``True``, :func:`copystat` can modify the last access
+ and modification times of a symbolic link.
- Copy the permission bits, last access time, last modification time, and flags
- from *src* to *dst*. The file contents, owner, and group are unaffected. *src*
- and *dst* are path names given as strings.
+ * If ``os.chflags in os.supports_follow_symlinks`` is
+ ``True``, :func:`copystat` can modify the flags of
+ a symbolic link. (``os.chflags`` is not available on
+ all platforms.)
+ On platforms where some or all of this functionality
+ is unavailable, when asked to modify a symbolic link,
+ :func:`copystat` will copy everything it can.
+ :func:`copystat` never returns failure.
-.. function:: copy(src, dst)
+ Please see :data:`os.supports_follow_symlinks`
+ for more information.
- Copy the file *src* to the file or directory *dst*. If *dst* is a directory, a
- file with the same basename as *src* is created (or overwritten) in the
- directory specified. Permission bits are copied. *src* and *dst* are path
- names given as strings.
+ .. versionchanged:: 3.3
+ Added *follow_symlinks* argument and support for Linux extended attributes.
+.. function:: copy(src, dst, *, follow_symlinks=True)
-.. function:: copy2(src, dst)
+ Copies the file *src* to the file or directory *dst*. *src* and *dst*
+ should be strings. If *dst* specifies a directory, the file will be
+ copied into *dst* using the base filename from *src*. Returns the
+ path to the newly created file.
- Similar to :func:`shutil.copy`, but metadata is copied as well -- in fact,
- this is just :func:`shutil.copy` followed by :func:`copystat`. This is
- similar to the Unix command :program:`cp -p`.
+ If *follow_symlinks* is false, and *src* is a symbolic link,
+ *dst* will be created as a symbolic link. If *follow_symlinks*
+ is true and *src* is a symbolic link, *dst* will be a copy of
+ the file *src* refers to.
+ :func:`copy` copies the file data and the file's permission
+ mode (see :func:`os.chmod`). Other metadata, like the
+ file's creation and modification times, is not preserved.
+ To preserve all file metadata from the original, use
+ :func:`~shutil.copy2` instead.
+
+ .. versionchanged:: 3.3
+ Added *follow_symlinks* argument.
+ Now returns path to the newly created file.
+
+.. function:: copy2(src, dst, *, follow_symlinks=True)
+
+ Identical to :func:`~shutil.copy` except that :func:`copy2`
+ also attempts to preserve all file metadata.
+
+ When *follow_symlinks* is false, and *src* is a symbolic
+ link, :func:`copy2` attempts to copy all metadata from the
+ *src* symbolic link to the newly-created *dst* symbolic link.
+ However, this functionality is not available on all platforms.
+ On platforms where some or all of this functionality is
+ unavailable, :func:`copy2` will preserve all the metadata
+ it can; :func:`copy2` never returns failure.
+
+ :func:`copy2` uses :func:`copystat` to copy the file metadata.
+ Please see :func:`copystat` for more information
+ about platform support for modifying symbolic link metadata.
+
+ .. versionchanged:: 3.3
+ Added *follow_symlinks* argument, try to copy extended
+ file system attributes too (currently Linux only).
+ Now returns path to the newly created file.
.. function:: ignore_patterns(\*patterns)
@@ -96,16 +180,17 @@ Directory and files operations
.. function:: copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=False)
- Recursively copy an entire directory tree rooted at *src*. The destination
+ Recursively copy an entire directory tree rooted at *src*, returning the
+ destination directory. The destination
directory, named by *dst*, must not already exist; it will be created as
well as missing parent directories. Permissions and times of directories
are copied with :func:`copystat`, individual files are copied using
:func:`shutil.copy2`.
If *symlinks* is true, symbolic links in the source tree are represented as
- symbolic links in the new tree, but the metadata of the original links is NOT
- copied; if false or omitted, the contents and metadata of the linked files
- are copied to the new tree.
+ symbolic links in the new tree and the metadata of the original links will
+ be copied as far as the platform allows; if false or omitted, the contents
+ and metadata of the linked files are copied to the new tree.
When *symlinks* is false, if the file pointed by the symlink doesn't
exist, a exception will be added in the list of errors raised in
@@ -129,13 +214,15 @@ Directory and files operations
If *copy_function* is given, it must be a callable that will be used to copy
each file. It will be called with the source path and the destination path
as arguments. By default, :func:`shutil.copy2` is used, but any function
- that supports the same signature (like :func:`copy`) can be used.
+ that supports the same signature (like :func:`shutil.copy`) can be used.
+
+ .. versionchanged:: 3.3
+ Copy metadata when *symlinks* is false.
+ Now returns *dst*.
.. versionchanged:: 3.2
Added the *copy_function* argument to be able to provide a custom copy
function.
-
- .. versionchanged:: 3.2
Added the *ignore_dangling_symlinks* argument to silent dangling symlinks
errors when *symlinks* is false.
@@ -150,19 +237,42 @@ Directory and files operations
handled by calling a handler specified by *onerror* or, if that is omitted,
they raise an exception.
+ .. note::
+
+ On platforms that support the necessary fd-based functions a symlink
+ attack resistant version of :func:`rmtree` is used by default. On other
+ platforms, the :func:`rmtree` implementation is susceptible to a symlink
+ attack: given proper timing and circumstances, attackers can manipulate
+ symlinks on the filesystem to delete files they wouldn't be able to access
+ otherwise. Applications can use the :data:`rmtree.avoids_symlink_attacks`
+ function attribute to determine which case applies.
+
If *onerror* is provided, it must be a callable that accepts three
- parameters: *function*, *path*, and *excinfo*. The first parameter,
- *function*, is the function which raised the exception; it will be
- :func:`os.path.islink`, :func:`os.listdir`, :func:`os.remove` or
- :func:`os.rmdir`. The second parameter, *path*, will be the path name passed
- to *function*. The third parameter, *excinfo*, will be the exception
- information return by :func:`sys.exc_info`. Exceptions raised by *onerror*
- will not be caught.
+ parameters: *function*, *path*, and *excinfo*.
+
+ The first parameter, *function*, is the function which raised the exception;
+ it depends on the platform and implementation. The second parameter,
+ *path*, will be the path name passed to *function*. The third parameter,
+ *excinfo*, will be the exception information returned by
+ :func:`sys.exc_info`. Exceptions raised by *onerror* will not be caught.
+
+ .. versionchanged:: 3.3
+ Added a symlink attack resistant version that is used automatically
+ if platform supports fd-based functions.
+
+ .. attribute:: rmtree.avoids_symlink_attacks
+
+ Indicates whether the current platform and implementation provides a
+ symlink attack resistant version of :func:`rmtree`. Currently this is
+ only true for platforms supporting fd-based directory access functions.
+
+ .. versionadded:: 3.3
.. function:: move(src, dst)
- Recursively move a file or directory (*src*) to another location (*dst*).
+ Recursively move a file or directory (*src*) to another location (*dst*)
+ and return the destination.
If the destination is a directory or a symlink to a directory, then *src* is
moved inside that directory.
@@ -173,7 +283,61 @@ Directory and files operations
If the destination is on the current filesystem, then :func:`os.rename` is
used. Otherwise, *src* is copied (using :func:`shutil.copy2`) to *dst* and
- then removed.
+ then removed. In case of symlinks, a new symlink pointing to the target of
+ *src* will be created in or as *dst* and *src* will be removed.
+
+ .. versionchanged:: 3.3
+ Added explicit symlink handling for foreign filesystems, thus adapting
+ it to the behavior of GNU's :program:`mv`.
+ Now returns *dst*.
+
+.. function:: disk_usage(path)
+
+ Return disk usage statistics about the given path as a :term:`named tuple`
+ with the attributes *total*, *used* and *free*, which are the amount of
+ total, used and free space, in bytes.
+
+ .. versionadded:: 3.3
+
+ Availability: Unix, Windows.
+
+.. function:: chown(path, user=None, group=None)
+
+ Change owner *user* and/or *group* of the given *path*.
+
+ *user* can be a system user name or a uid; the same applies to *group*. At
+ least one argument is required.
+
+ See also :func:`os.chown`, the underlying function.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: which(cmd, mode=os.F_OK | os.X_OK, path=None)
+
+ Return the path to an executable which would be run if the given *cmd* was
+ called. If no *cmd* would be called, return ``None``.
+
+ *mode* is a permission mask passed a to :func:`os.access`, by default
+ determining if the file exists and executable.
+
+ When no *path* is specified, the results of :func:`os.environ` are used,
+ returning either the "PATH" value or a fallback of :attr:`os.defpath`.
+
+ On Windows, the current directory is always prepended to the *path* whether
+ or not you use the default or provide your own, which is the behavior the
+ command shell uses when finding executables. Additionaly, when finding the
+ *cmd* in the *path*, the ``PATHEXT`` environment variable is checked. For
+ example, if you call ``shutil.which("python")``, :func:`which` will search
+ ``PATHEXT`` to know that it should look for ``python.exe`` within the *path*
+ directories. For example, on Windows::
+
+ >>> shutil.which("python")
+ 'c:\\python33\\python.exe'
+
+ .. versionadded:: 3.3
.. exception:: Error
@@ -186,7 +350,7 @@ Directory and files operations
.. _shutil-copytree-example:
copytree example
-::::::::::::::::
+~~~~~~~~~~~~~~~~
This example is the implementation of the :func:`copytree` function, described
above, with the docstring omitted. It demonstrates many of the other functions
@@ -250,6 +414,8 @@ Another example that uses the *ignore* argument to add a logging call::
Archiving operations
--------------------
+.. versionadded:: 3.2
+
High-level utilities to create and read compressed and archived files are also
provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
@@ -277,8 +443,6 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
*logger* must be an object compatible with :pep:`282`, usually an instance of
:class:`logging.Logger`.
- .. versionadded:: 3.2
-
.. function:: get_archive_formats()
@@ -295,8 +459,6 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
You can register new formats or provide your own archiver for any existing
formats, by using :func:`register_archive_format`.
- .. versionadded:: 3.2
-
.. function:: register_archive_format(name, function, [extra_args, [description]])
@@ -309,15 +471,11 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
*description* is used by :func:`get_archive_formats` which returns the
list of archivers. Defaults to an empty list.
- .. versionadded:: 3.2
-
.. function:: unregister_archive_format(name)
Remove the archive format *name* from the list of supported formats.
- .. versionadded:: 3.2
-
.. function:: unpack_archive(filename[, extract_dir[, format]])
@@ -332,8 +490,6 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
and see if an unpacker was registered for that extension. In case none is
found, a :exc:`ValueError` is raised.
- .. versionadded:: 3.2
-
.. function:: register_unpack_format(name, extensions, function[, extra_args[, description]])
@@ -351,15 +507,11 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
*description* can be provided to describe the format, and will be returned
by the :func:`get_unpack_formats` function.
- .. versionadded:: 3.2
-
.. function:: unregister_unpack_format(name)
Unregister an unpack format. *name* is the name of the format.
- .. versionadded:: 3.2
-
.. function:: get_unpack_formats()
@@ -377,13 +529,11 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
You can register new formats or provide your own unpacker for any existing
formats, by using :func:`register_unpack_format`.
- .. versionadded:: 3.2
-
.. _shutil-archiving-example:
Archiving example
-:::::::::::::::::
+~~~~~~~~~~~~~~~~~
In this example, we create a gzip'ed tar-file archive containing all files
found in the :file:`.ssh` directory of the user::
@@ -406,3 +556,36 @@ The resulting archive contains::
-rw------- tarek/staff 1675 2008-06-09 13:26:54 ./id_rsa
-rw-r--r-- tarek/staff 397 2008-06-09 13:26:54 ./id_rsa.pub
-rw-r--r-- tarek/staff 37192 2010-02-06 18:23:10 ./known_hosts
+
+
+Querying the size of the output terminal
+----------------------------------------
+
+.. versionadded:: 3.3
+
+.. function:: get_terminal_size(fallback=(columns, lines))
+
+ Get the size of the terminal window.
+
+ For each of the two dimensions, the environment variable, ``COLUMNS``
+ and ``LINES`` respectively, is checked. If the variable is defined and
+ the value is a positive integer, it is used.
+
+ When ``COLUMNS`` or ``LINES`` is not defined, which is the common case,
+ the terminal connected to :data:`sys.__stdout__` is queried
+ by invoking :func:`os.get_terminal_size`.
+
+ If the terminal size cannot be successfully queried, either because
+ the system doesn't support querying, or because we are not
+ connected to a terminal, the value given in ``fallback`` parameter
+ is used. ``fallback`` defaults to ``(80, 24)`` which is the default
+ size used by many terminal emulators.
+
+ The value returned is a named tuple of type :class:`os.terminal_size`.
+
+ See also: The Single UNIX Specification, Version 2,
+ `Other Environment Variables`_.
+
+.. _`Other Environment Variables`:
+ http://pubs.opengroup.org/onlinepubs/7908799/xbd/envvar.html#tag_002_003
+
diff --git a/Doc/library/signal.rst b/Doc/library/signal.rst
index d1cae13..2a472fe 100644
--- a/Doc/library/signal.rst
+++ b/Doc/library/signal.rst
@@ -44,6 +44,9 @@ This has consequences:
signal handlers will be called when the calculation finishes.
+.. _signals-and-threads:
+
+
Signals and threads
^^^^^^^^^^^^^^^^^^^
@@ -131,6 +134,28 @@ The variables defined in the :mod:`signal` module are:
in user and kernel space. SIGPROF is delivered upon expiration.
+.. data:: SIG_BLOCK
+
+ A possible value for the *how* parameter to :func:`pthread_sigmask`
+ indicating that signals are to be blocked.
+
+ .. versionadded:: 3.3
+
+.. data:: SIG_UNBLOCK
+
+ A possible value for the *how* parameter to :func:`pthread_sigmask`
+ indicating that signals are to be unblocked.
+
+ .. versionadded:: 3.3
+
+.. data:: SIG_SETMASK
+
+ A possible value for the *how* parameter to :func:`pthread_sigmask`
+ indicating that the signal mask is to be replaced.
+
+ .. versionadded:: 3.3
+
+
The :mod:`signal` module defines one exception:
.. exception:: ItimerError
@@ -138,7 +163,11 @@ The :mod:`signal` module defines one exception:
Raised to signal an error from the underlying :func:`setitimer` or
:func:`getitimer` implementation. Expect this error if an invalid
interval timer or a negative time is passed to :func:`setitimer`.
- This error is a subtype of :exc:`IOError`.
+ This error is a subtype of :exc:`OSError`.
+
+ .. versionadded:: 3.3
+ This error used to be a subtype of :exc:`IOError`, which is now an
+ alias of :exc:`OSError`.
The :mod:`signal` module defines the following functions:
@@ -172,6 +201,65 @@ The :mod:`signal` module defines the following functions:
will then be called. Returns nothing. Not on Windows. (See the Unix man page
:manpage:`signal(2)`.)
+ See also :func:`sigwait`, :func:`sigwaitinfo`, :func:`sigtimedwait` and
+ :func:`sigpending`.
+
+
+.. function:: pthread_kill(thread_id, signum)
+
+ Send the signal *signum* to the thread *thread_id*, another thread in the
+ same process as the caller. The target thread can be executing any code
+ (Python or not). However, if the target thread is executing the Python
+ interpreter, the Python signal handlers will be :ref:`executed by the main
+ thread <signals-and-threads>`. Therefore, the only point of sending a signal to a particular
+ Python thread would be to force a running system call to fail with
+ :exc:`InterruptedError`.
+
+ Use :func:`threading.get_ident()` or the :attr:`~threading.Thread.ident`
+ attribute of :class:`threading.Thread` objects to get a suitable value
+ for *thread_id*.
+
+ If *signum* is 0, then no signal is sent, but error checking is still
+ performed; this can be used to check if the target thread is still running.
+
+ Availability: Unix (see the man page :manpage:`pthread_kill(3)` for further
+ information).
+
+ See also :func:`os.kill`.
+
+ .. versionadded:: 3.3
+
+
+.. function:: pthread_sigmask(how, mask)
+
+ Fetch and/or change the signal mask of the calling thread. The signal mask
+ is the set of signals whose delivery is currently blocked for the caller.
+ Return the old signal mask as a set of signals.
+
+ The behavior of the call is dependent on the value of *how*, as follows.
+
+ * :data:`SIG_BLOCK`: The set of blocked signals is the union of the current
+ set and the *mask* argument.
+ * :data:`SIG_UNBLOCK`: The signals in *mask* are removed from the current
+ set of blocked signals. It is permissible to attempt to unblock a
+ signal which is not blocked.
+ * :data:`SIG_SETMASK`: The set of blocked signals is set to the *mask*
+ argument.
+
+ *mask* is a set of signal numbers (e.g. {:const:`signal.SIGINT`,
+ :const:`signal.SIGTERM`}). Use ``range(1, signal.NSIG)`` for a full mask
+ including all signals.
+
+ For example, ``signal.pthread_sigmask(signal.SIG_BLOCK, [])`` reads the
+ signal mask of the calling thread.
+
+ Availability: Unix. See the man page :manpage:`sigprocmask(3)` and
+ :manpage:`pthread_sigmask(3)` for further information.
+
+ See also :func:`pause`, :func:`sigpending` and :func:`sigwait`.
+
+ .. versionadded:: 3.3
+
.. function:: setitimer(which, seconds[, interval])
@@ -201,13 +289,17 @@ The :mod:`signal` module defines the following functions:
.. function:: set_wakeup_fd(fd)
- Set the wakeup fd to *fd*. When a signal is received, a ``'\0'`` byte is
- written to the fd. This can be used by a library to wakeup a poll or select
- call, allowing the signal to be fully processed.
+ Set the wakeup file descriptor to *fd*. When a signal is received, the
+ signal number is written as a single byte into the fd. This can be used by
+ a library to wakeup a poll or select call, allowing the signal to be fully
+ processed.
The old wakeup fd is returned. *fd* must be non-blocking. It is up to the
library to remove any bytes before calling poll or select again.
+ Use for example ``struct.unpack('%uB' % len(data), data)`` to decode the
+ signal numbers list.
+
When threads are enabled, this function can only be called from the main thread;
attempting to call it from other threads will cause a :exc:`ValueError`
exception to be raised.
@@ -247,6 +339,73 @@ The :mod:`signal` module defines the following functions:
:const:`SIGTERM`. A :exc:`ValueError` will be raised in any other case.
+.. function:: sigpending()
+
+ Examine the set of signals that are pending for delivery to the calling
+ thread (i.e., the signals which have been raised while blocked). Return the
+ set of the pending signals.
+
+ Availability: Unix (see the man page :manpage:`sigpending(2)` for further
+ information).
+
+ See also :func:`pause`, :func:`pthread_sigmask` and :func:`sigwait`.
+
+ .. versionadded:: 3.3
+
+
+.. function:: sigwait(sigset)
+
+ Suspend execution of the calling thread until the delivery of one of the
+ signals specified in the signal set *sigset*. The function accepts the signal
+ (removes it from the pending list of signals), and returns the signal number.
+
+ Availability: Unix (see the man page :manpage:`sigwait(3)` for further
+ information).
+
+ See also :func:`pause`, :func:`pthread_sigmask`, :func:`sigpending`,
+ :func:`sigwaitinfo` and :func:`sigtimedwait`.
+
+ .. versionadded:: 3.3
+
+
+.. function:: sigwaitinfo(sigset)
+
+ Suspend execution of the calling thread until the delivery of one of the
+ signals specified in the signal set *sigset*. The function accepts the
+ signal and removes it from the pending list of signals. If one of the
+ signals in *sigset* is already pending for the calling thread, the function
+ will return immediately with information about that signal. The signal
+ handler is not called for the delivered signal. The function raises an
+ :exc:`InterruptedError` if it is interrupted by a signal that is not in
+ *sigset*.
+
+ The return value is an object representing the data contained in the
+ :c:type:`siginfo_t` structure, namely: :attr:`si_signo`, :attr:`si_code`,
+ :attr:`si_errno`, :attr:`si_pid`, :attr:`si_uid`, :attr:`si_status`,
+ :attr:`si_band`.
+
+ Availability: Unix (see the man page :manpage:`sigwaitinfo(2)` for further
+ information).
+
+ See also :func:`pause`, :func:`sigwait` and :func:`sigtimedwait`.
+
+ .. versionadded:: 3.3
+
+
+.. function:: sigtimedwait(sigset, timeout)
+
+ Like :func:`sigwaitinfo`, but takes an additional *timeout* argument
+ specifying a timeout. If *timeout* is specified as :const:`0`, a poll is
+ performed. Returns :const:`None` if a timeout occurs.
+
+ Availability: Unix (see the man page :manpage:`sigtimedwait(2)` for further
+ information).
+
+ See also :func:`pause`, :func:`sigwait` and :func:`sigwaitinfo`.
+
+ .. versionadded:: 3.3
+
+
.. _signal-example:
Example
@@ -263,7 +422,7 @@ be sent, and the handler raises an exception. ::
def handler(signum, frame):
print('Signal handler called with signal', signum)
- raise IOError("Couldn't open device!")
+ raise OSError("Couldn't open device!")
# Set the signal handler and a 5-second alarm
signal.signal(signal.SIGALRM, handler)
diff --git a/Doc/library/site.rst b/Doc/library/site.rst
index 579571a..36b80c3 100644
--- a/Doc/library/site.rst
+++ b/Doc/library/site.rst
@@ -16,7 +16,14 @@ import can be suppressed using the interpreter's :option:`-S` option.
.. index:: triple: module; search; path
Importing this module will append site-specific paths to the module search path
-and add a few builtins.
+and add a few builtins, unless :option:`-S` was used. In that case, this module
+can be safely imported with no automatic modifications to the module search path
+or additions to the builtins. To explicitly trigger the usual site-specific
+additions, call the :func:`site.main` function.
+
+.. versionchanged:: 3.3
+ Importing the module used to trigger paths manipulation even when using
+ :option:`-S`.
.. index::
pair: site-python; directory
@@ -31,6 +38,15 @@ Unix and Macintosh). For each of the distinct head-tail combinations, it sees
if it refers to an existing directory, and if so, adds it to ``sys.path`` and
also inspects the newly added path for configuration files.
+If a file named "pyvenv.cfg" exists one directory above sys.executable,
+sys.prefix and sys.exec_prefix are set to that directory and
+it is also checked for site-packages and site-python (sys.base_prefix and
+sys.base_exec_prefix will always be the "real" prefixes of the Python
+installation). If "pyvenv.cfg" (a bootstrap configuration file) contains
+the key "include-system-site-packages" set to anything other than "false"
+(case-insensitive), the system-level prefixes will still also be
+searched for site-packages; otherwise they won't.
+
A path configuration file is a file whose name has the form :file:`{name}.pth`
and exists in one of the four directories mentioned above; its contents are
additional items (one per line) to be added to ``sys.path``. Non-existing items
@@ -129,8 +145,19 @@ empty, and the path manipulations are skipped; however the import of
:file:`~/Library/Python/{X.Y}` for Mac framework builds, and
:file:`{%APPDATA%}\\Python` for Windows. This value is used by Distutils to
compute the installation directories for scripts, data files, Python modules,
- etc. for the :ref:`user installation scheme <inst-alt-install-user>`. See
- also :envvar:`PYTHONUSERBASE`.
+ etc. for the :ref:`user installation scheme <inst-alt-install-user>`.
+ See also :envvar:`PYTHONUSERBASE`.
+
+
+.. function:: main()
+
+ Adds all the standard site-specific directories to the module search
+ path. This function is called automatically when this module is imported,
+ unless the :program:`python` interpreter was started with the :option:`-S`
+ flag.
+
+ .. versionchanged:: 3.3
+ This function used to be called unconditionnally.
.. function:: addsitedir(sitedir, known_paths=None)
diff --git a/Doc/library/smtpd.rst b/Doc/library/smtpd.rst
index bfdc727..2ca71ff 100644
--- a/Doc/library/smtpd.rst
+++ b/Doc/library/smtpd.rst
@@ -20,17 +20,24 @@ specific mail-sending strategies.
Additionally the SMTPChannel may be extended to implement very specific
interaction behaviour with SMTP clients.
+The code supports :RFC:`5321`, plus the :rfc:`1870` SIZE extension.
+
+
SMTPServer Objects
------------------
-.. class:: SMTPServer(localaddr, remoteaddr)
+.. class:: SMTPServer(localaddr, remoteaddr, data_size_limit=33554432)
Create a new :class:`SMTPServer` object, which binds to local address
*localaddr*. It will treat *remoteaddr* as an upstream SMTP relayer. It
inherits from :class:`asyncore.dispatcher`, and so will insert itself into
:mod:`asyncore`'s event loop on instantiation.
+ *data_size_limit* specifies the maximum number of bytes that will be
+ accepted in a ``DATA`` command. A value of ``None`` or ``0`` means no
+ limit.
+
.. method:: process_message(peer, mailfrom, rcpttos, data)
Raise :exc:`NotImplementedError` exception. Override this in subclasses to
@@ -156,11 +163,15 @@ SMTPChannel Objects
Command Action taken
======== ===================================================================
HELO Accepts the greeting from the client and stores it in
- :attr:`seen_greeting`.
+ :attr:`seen_greeting`. Sets server to base command mode.
+ EHLO Accepts the greeting from the client and stores it in
+ :attr:`seen_greeting`. Sets server to extended command mode.
NOOP Takes no action.
QUIT Closes the connection cleanly.
MAIL Accepts the "MAIL FROM:" syntax and stores the supplied address as
- :attr:`mailfrom`.
+ :attr:`mailfrom`. In extended command mode, accepts the
+ :rfc:`1870` SIZE attribute and responds appropriately based on the
+ value of *data_size_limit*.
RCPT Accepts the "RCPT TO:" syntax and stores the supplied addresses in
the :attr:`rcpttos` list.
RSET Resets the :attr:`mailfrom`, :attr:`rcpttos`, and
@@ -168,4 +179,7 @@ SMTPChannel Objects
DATA Sets the internal state to :attr:`DATA` and stores remaining lines
from the client in :attr:`received_data` until the terminator
``"\r\n.\r\n"`` is received.
+ HELP Returns minimal information on command syntax
+ VRFY Returns code 252 (the server doesn't know if the address is valid)
+ EXPN Reports that the command is not implemented.
======== ===================================================================
diff --git a/Doc/library/smtplib.rst b/Doc/library/smtplib.rst
index 3101ab7..711981f 100644
--- a/Doc/library/smtplib.rst
+++ b/Doc/library/smtplib.rst
@@ -20,7 +20,7 @@ details of SMTP and ESMTP operation, consult :rfc:`821` (Simple Mail Transfer
Protocol) and :rfc:`1869` (SMTP Service Extensions).
-.. class:: SMTP(host='', port=0, local_hostname=None[, timeout])
+.. class:: SMTP(host='', port=0, local_hostname=None[, timeout], source_address=None)
A :class:`SMTP` instance encapsulates an SMTP connection. It has methods
that support a full repertoire of SMTP and ESMTP operations. If the optional
@@ -29,13 +29,34 @@ Protocol) and :rfc:`1869` (SMTP Service Extensions).
raised if the specified host doesn't respond correctly. The optional
*timeout* parameter specifies a timeout in seconds for blocking operations
like the connection attempt (if not specified, the global default timeout
- setting will be used).
+ setting will be used). The optional source_address parameter allows to bind to some
+ specific source address in a machine with multiple network interfaces,
+ and/or to some specific source TCP port. It takes a 2-tuple (host, port),
+ for the socket to bind to as its source address before connecting. If
+ omitted (or if host or port are ``''`` and/or 0 respectively) the OS default
+ behavior will be used.
For normal use, you should only require the initialization/connect,
:meth:`sendmail`, and :meth:`quit` methods. An example is included below.
+ The :class:`SMTP` class supports the :keyword:`with` statement. When used
+ like this, the SMTP ``QUIT`` command is issued automatically when the
+ :keyword:`with` statement exits. E.g.::
-.. class:: SMTP_SSL(host='', port=0, local_hostname=None, keyfile=None, certfile=None[, timeout])
+ >>> from smtplib import SMTP
+ >>> with SMTP("domain.org") as smtp:
+ ... smtp.noop()
+ ...
+ (250, b'Ok')
+ >>>
+
+ .. versionchanged:: 3.3
+ Support for the :keyword:`with` statement was added.
+
+ .. versionchanged:: 3.3
+ source_address argument was added.
+
+.. class:: SMTP_SSL(host='', port=0, local_hostname=None, keyfile=None, certfile=None[, timeout], context=None, source_address=None)
A :class:`SMTP_SSL` instance behaves exactly the same as instances of
:class:`SMTP`. :class:`SMTP_SSL` should be used for situations where SSL is
@@ -43,18 +64,33 @@ Protocol) and :rfc:`1869` (SMTP Service Extensions).
not appropriate. If *host* is not specified, the local host is used. If
*port* is zero, the standard SMTP-over-SSL port (465) is used. *keyfile*
and *certfile* are also optional, and can contain a PEM formatted private key
- and certificate chain file for the SSL connection. The optional *timeout*
+ and certificate chain file for the SSL connection. *context* also optional, can contain
+ a SSLContext, and is an alternative to keyfile and certfile; If it is specified both
+ keyfile and certfile must be None. The optional *timeout*
parameter specifies a timeout in seconds for blocking operations like the
connection attempt (if not specified, the global default timeout setting
- will be used).
+ will be used). The optional source_address parameter allows to bind to some
+ specific source address in a machine with multiple network interfaces,
+ and/or to some specific source tcp port. It takes a 2-tuple (host, port),
+ for the socket to bind to as its source address before connecting. If
+ omitted (or if host or port are ``''`` and/or 0 respectively) the OS default
+ behavior will be used.
+
+ .. versionchanged:: 3.3
+ *context* was added.
+ .. versionchanged:: 3.3
+ source_address argument was added.
-.. class:: LMTP(host='', port=LMTP_PORT, local_hostname=None)
+
+.. class:: LMTP(host='', port=LMTP_PORT, local_hostname=None, source_address=None)
The LMTP protocol, which is very similar to ESMTP, is heavily based on the
- standard SMTP client. It's common to use Unix sockets for LMTP, so our :meth:`connect`
- method must support that as well as a regular host:port server. To specify a
- Unix socket, you must use an absolute path for *host*, starting with a '/'.
+ standard SMTP client. It's common to use Unix sockets for LMTP, so our
+ :meth:`connect` method must support that as well as a regular host:port
+ server. The optional arguments local_hostname and source_address have the
+ same meaning as that of SMTP client. To specify a Unix socket, you must use
+ an absolute path for *host*, starting with a '/'.
Authentication is supported, using the regular SMTP mechanism. When using a Unix
socket, LMTP generally don't support or require any authentication, but your
@@ -242,7 +278,7 @@ An :class:`SMTP` instance has the following methods:
No suitable authentication method was found.
-.. method:: SMTP.starttls(keyfile=None, certfile=None)
+.. method:: SMTP.starttls(keyfile=None, certfile=None, context=None)
Put the SMTP connection in TLS (Transport Layer Security) mode. All SMTP
commands that follow will be encrypted. You should then call :meth:`ehlo`
@@ -251,6 +287,9 @@ An :class:`SMTP` instance has the following methods:
If *keyfile* and *certfile* are provided, these are passed to the :mod:`socket`
module's :func:`ssl` function.
+ Optional *context* parameter is a :class:`ssl.SSLContext` object; This is an alternative to
+ using a keyfile and a certfile and if specified both *keyfile* and *certfile* should be None.
+
If there has been no previous ``EHLO`` or ``HELO`` command this session,
this method tries ESMTP ``EHLO`` first.
@@ -263,6 +302,9 @@ An :class:`SMTP` instance has the following methods:
:exc:`RuntimeError`
SSL/TLS support is not available to your Python interpreter.
+ .. versionchanged:: 3.3
+ *context* was added.
+
.. method:: SMTP.sendmail(from_addr, to_addrs, msg, mail_options=[], rcpt_options=[])
@@ -320,7 +362,8 @@ An :class:`SMTP` instance has the following methods:
Unless otherwise noted, the connection will be open even after an exception is
raised.
- .. versionchanged:: 3.2 *msg* may be a byte string.
+ .. versionchanged:: 3.2
+ *msg* may be a byte string.
.. method:: SMTP.send_message(msg, from_addr=None, to_addrs=None, \
diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst
index 344a29f..5737b40 100644
--- a/Doc/library/socket.rst
+++ b/Doc/library/socket.rst
@@ -40,9 +40,23 @@ Socket families
Depending on the system and the build options, various socket families
are supported by this module.
-Socket addresses are represented as follows:
-
-- A single string is used for the :const:`AF_UNIX` address family.
+The address format required by a particular socket object is automatically
+selected based on the address family specified when the socket object was
+created. Socket addresses are represented as follows:
+
+- The address of an :const:`AF_UNIX` socket bound to a file system node
+ is represented as a string, using the file system encoding and the
+ ``'surrogateescape'`` error handler (see :pep:`383`). An address in
+ Linux's abstract namespace is returned as a :class:`bytes` object with
+ an initial null byte; note that sockets in this namespace can
+ communicate with normal file system sockets, so programs intended to
+ run on Linux may need to deal with both types of address. A string or
+ :class:`bytes` object can be used for either type of address when
+ passing it as an argument.
+
+ .. versionchanged:: 3.3
+ Previously, :const:`AF_UNIX` socket paths were assumed to use UTF-8
+ encoding.
- A pair ``(host, port)`` is used for the :const:`AF_INET` address family,
where *host* is a string representing either a hostname in Internet domain
@@ -80,6 +94,19 @@ Socket addresses are represented as follows:
If *addr_type* is :const:`TIPC_ADDR_ID`, then *v1* is the node, *v2* is the
reference, and *v3* should be set to 0.
+- A tuple ``(interface, )`` is used for the :const:`AF_CAN` address family,
+ where *interface* is a string representing a network interface name like
+ ``'can0'``. The network interface name ``''`` can be used to receive packets
+ from all network interfaces of this family.
+
+- A string or a tuple ``(id, unit)`` is used for the :const:`SYSPROTO_CONTROL`
+ protocol of the :const:`PF_SYSTEM` family. The string is the name of a
+ kernel control using a dynamically-assigned ID. The tuple can be used if ID
+ and unit number of the kernel control are known or if a registered ID is
+ used.
+
+ .. versionadded:: 3.3
+
- Certain other address families (:const:`AF_BLUETOOTH`, :const:`AF_PACKET`)
support specific representations.
@@ -99,8 +126,9 @@ resolution and/or the host configuration. For deterministic behavior use a
numeric address in *host* portion.
All errors raise exceptions. The normal exceptions for invalid argument types
-and out-of-memory conditions can be raised; errors related to socket or address
-semantics raise :exc:`socket.error` or one of its subclasses.
+and out-of-memory conditions can be raised; starting from Python 3.3, errors
+related to socket or address semantics raise :exc:`OSError` or one of its
+subclasses (they used to raise :exc:`socket.error`).
Non-blocking mode is supported through :meth:`~socket.setblocking`. A
generalization of this based on timeouts is supported through
@@ -115,20 +143,15 @@ The module :mod:`socket` exports the following constants and functions:
.. exception:: error
- .. index:: module: errno
-
- A subclass of :exc:`IOError`, this exception is raised for socket-related
- errors. It is recommended that you inspect its ``errno`` attribute to
- discriminate between different kinds of errors.
+ A deprecated alias of :exc:`OSError`.
- .. seealso::
- The :mod:`errno` module contains symbolic names for the error codes
- defined by the underlying operating system.
+ .. versionchanged:: 3.3
+ Following :pep:`3151`, this class was made an alias of :exc:`OSError`.
.. exception:: herror
- A subclass of :exc:`socket.error`, this exception is raised for
+ A subclass of :exc:`OSError`, this exception is raised for
address-related errors, i.e. for functions that use *h_errno* in the POSIX
C API, including :func:`gethostbyname_ex` and :func:`gethostbyaddr`.
The accompanying value is a pair ``(h_errno, string)`` representing an
@@ -136,10 +159,12 @@ The module :mod:`socket` exports the following constants and functions:
*string* represents the description of *h_errno*, as returned by the
:c:func:`hstrerror` C function.
+ .. versionchanged:: 3.3
+ This class was made a subclass of :exc:`OSError`.
.. exception:: gaierror
- A subclass of :exc:`socket.error`, this exception is raised for
+ A subclass of :exc:`OSError`, this exception is raised for
address-related errors by :func:`getaddrinfo` and :func:`getnameinfo`.
The accompanying value is a pair ``(error, string)`` representing an error
returned by a library call. *string* represents the description of
@@ -147,15 +172,19 @@ The module :mod:`socket` exports the following constants and functions:
numeric *error* value will match one of the :const:`EAI_\*` constants
defined in this module.
+ .. versionchanged:: 3.3
+ This class was made a subclass of :exc:`OSError`.
.. exception:: timeout
- A subclass of :exc:`socket.error`, this exception is raised when a timeout
+ A subclass of :exc:`OSError`, this exception is raised when a timeout
occurs on a socket which has had timeouts enabled via a prior call to
:meth:`~socket.settimeout` (or implicitly through
:func:`~socket.setdefaulttimeout`). The accompanying value is a string
whose value is currently always "timed out".
+ .. versionchanged:: 3.3
+ This class was made a subclass of :exc:`OSError`.
.. data:: AF_UNIX
AF_INET
@@ -198,6 +227,7 @@ The module :mod:`socket` exports the following constants and functions:
SOMAXCONN
MSG_*
SOL_*
+ SCM_*
IPPROTO_*
IPPORT_*
INADDR_*
@@ -215,6 +245,32 @@ The module :mod:`socket` exports the following constants and functions:
in the Unix header files are defined; for a few symbols, default values are
provided.
+.. data:: AF_CAN
+ PF_CAN
+ SOL_CAN_*
+ CAN_*
+
+ Many constants of these forms, documented in the Linux documentation, are
+ also defined in the socket module.
+
+ Availability: Linux >= 2.6.25.
+
+ .. versionadded:: 3.3
+
+
+.. data:: AF_RDS
+ PF_RDS
+ SOL_RDS
+ RDS_*
+
+ Many constants of these forms, documented in the Linux documentation, are
+ also defined in the socket module.
+
+ Availability: Linux >= 2.6.30.
+
+ .. versionadded:: 3.3
+
+
.. data:: SIO_*
RCVALL_*
@@ -393,10 +449,15 @@ The module :mod:`socket` exports the following constants and functions:
Create a new socket using the given address family, socket type and protocol
number. The address family should be :const:`AF_INET` (the default),
- :const:`AF_INET6` or :const:`AF_UNIX`. The socket type should be
- :const:`SOCK_STREAM` (the default), :const:`SOCK_DGRAM` or perhaps one of the
- other ``SOCK_`` constants. The protocol number is usually zero and may be
- omitted in that case.
+ :const:`AF_INET6`, :const:`AF_UNIX`, :const:`AF_CAN` or :const:`AF_RDS`. The
+ socket type should be :const:`SOCK_STREAM` (the default),
+ :const:`SOCK_DGRAM`, :const:`SOCK_RAW` or perhaps one of the other ``SOCK_``
+ constants. The protocol number is usually zero and may be omitted in that
+ case or :const:`CAN_RAW` in case the address family is :const:`AF_CAN`.
+
+ .. versionchanged:: 3.3
+ The AF_CAN family was added.
+ The AF_RDS family was added.
.. function:: socketpair([family[, type[, proto]]])
@@ -464,7 +525,7 @@ The module :mod:`socket` exports the following constants and functions:
Unix manual page :manpage:`inet(3)` for details.
If the IPv4 address string passed to this function is invalid,
- :exc:`socket.error` will be raised. Note that exactly what is valid depends on
+ :exc:`OSError` will be raised. Note that exactly what is valid depends on
the underlying C implementation of :c:func:`inet_aton`.
:func:`inet_aton` does not support IPv6, and :func:`inet_pton` should be used
@@ -481,7 +542,7 @@ The module :mod:`socket` exports the following constants and functions:
argument.
If the byte sequence passed to this function is not exactly 4 bytes in
- length, :exc:`socket.error` will be raised. :func:`inet_ntoa` does not
+ length, :exc:`OSError` will be raised. :func:`inet_ntoa` does not
support IPv6, and :func:`inet_ntop` should be used instead for IPv4/v6 dual
stack support.
@@ -495,7 +556,7 @@ The module :mod:`socket` exports the following constants and functions:
Supported values for *address_family* are currently :const:`AF_INET` and
:const:`AF_INET6`. If the IP address string *ip_string* is invalid,
- :exc:`socket.error` will be raised. Note that exactly what is valid depends on
+ :exc:`OSError` will be raised. Note that exactly what is valid depends on
both the value of *address_family* and the underlying implementation of
:c:func:`inet_pton`.
@@ -513,11 +574,54 @@ The module :mod:`socket` exports the following constants and functions:
Supported values for *address_family* are currently :const:`AF_INET` and
:const:`AF_INET6`. If the string *packed_ip* is not the correct length for the
specified address family, :exc:`ValueError` will be raised. A
- :exc:`socket.error` is raised for errors from the call to :func:`inet_ntop`.
+ :exc:`OSError` is raised for errors from the call to :func:`inet_ntop`.
Availability: Unix (maybe not all platforms).
+..
+ XXX: Are sendmsg(), recvmsg() and CMSG_*() available on any
+ non-Unix platforms? The old (obsolete?) 4.2BSD form of the
+ interface, in which struct msghdr has no msg_control or
+ msg_controllen members, is not currently supported.
+
+.. function:: CMSG_LEN(length)
+
+ Return the total length, without trailing padding, of an ancillary
+ data item with associated data of the given *length*. This value
+ can often be used as the buffer size for :meth:`~socket.recvmsg` to
+ receive a single item of ancillary data, but :rfc:`3542` requires
+ portable applications to use :func:`CMSG_SPACE` and thus include
+ space for padding, even when the item will be the last in the
+ buffer. Raises :exc:`OverflowError` if *length* is outside the
+ permissible range of values.
+
+ Availability: most Unix platforms, possibly others.
+
+ .. versionadded:: 3.3
+
+
+.. function:: CMSG_SPACE(length)
+
+ Return the buffer size needed for :meth:`~socket.recvmsg` to
+ receive an ancillary data item with associated data of the given
+ *length*, along with any trailing padding. The buffer space needed
+ to receive multiple items is the sum of the :func:`CMSG_SPACE`
+ values for their associated data lengths. Raises
+ :exc:`OverflowError` if *length* is outside the permissible range
+ of values.
+
+ Note that some systems might support ancillary data without
+ providing this function. Also note that setting the buffer size
+ using the results of this function may not precisely limit the
+ amount of ancillary data that can be received, since additional
+ data may be able to fit into the padding area.
+
+ Availability: most Unix platforms, possibly others.
+
+ .. versionadded:: 3.3
+
+
.. function:: getdefaulttimeout()
Return the default timeout in seconds (float) for new socket objects. A value
@@ -533,6 +637,59 @@ The module :mod:`socket` exports the following constants and functions:
meanings.
+.. function:: sethostname(name)
+
+ Set the machine's hostname to *name*. This will raise a
+ :exc:`OSError` if you don't have enough rights.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: if_nameindex()
+
+ Return a list of network interface information
+ (index int, name string) tuples.
+ :exc:`OSError` if the system call fails.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: if_nametoindex(if_name)
+
+ Return a network interface index number corresponding to an
+ interface name.
+ :exc:`OSError` if no interface with the given name exists.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: if_indextoname(if_index)
+
+ Return a network interface name corresponding to a
+ interface index number.
+ :exc:`OSError` if no interface with the given index exists.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: fromshare(data)
+
+ Instantiate a socket from data obtained from :meth:`~socket.share`.
+ The socket is assumed to be in blocking mode.
+
+ Availability: Windows.
+
+ .. versionadded:: 3.3
+
+
.. data:: SocketType
This is a Python type object that represents the socket object type. It is the
@@ -706,6 +863,109 @@ correspond to Unix system calls applicable to sockets.
to zero. (The format of *address* depends on the address family --- see above.)
+.. method:: socket.recvmsg(bufsize[, ancbufsize[, flags]])
+
+ Receive normal data (up to *bufsize* bytes) and ancillary data from
+ the socket. The *ancbufsize* argument sets the size in bytes of
+ the internal buffer used to receive the ancillary data; it defaults
+ to 0, meaning that no ancillary data will be received. Appropriate
+ buffer sizes for ancillary data can be calculated using
+ :func:`CMSG_SPACE` or :func:`CMSG_LEN`, and items which do not fit
+ into the buffer might be truncated or discarded. The *flags*
+ argument defaults to 0 and has the same meaning as for
+ :meth:`recv`.
+
+ The return value is a 4-tuple: ``(data, ancdata, msg_flags,
+ address)``. The *data* item is a :class:`bytes` object holding the
+ non-ancillary data received. The *ancdata* item is a list of zero
+ or more tuples ``(cmsg_level, cmsg_type, cmsg_data)`` representing
+ the ancillary data (control messages) received: *cmsg_level* and
+ *cmsg_type* are integers specifying the protocol level and
+ protocol-specific type respectively, and *cmsg_data* is a
+ :class:`bytes` object holding the associated data. The *msg_flags*
+ item is the bitwise OR of various flags indicating conditions on
+ the received message; see your system documentation for details.
+ If the receiving socket is unconnected, *address* is the address of
+ the sending socket, if available; otherwise, its value is
+ unspecified.
+
+ On some systems, :meth:`sendmsg` and :meth:`recvmsg` can be used to
+ pass file descriptors between processes over an :const:`AF_UNIX`
+ socket. When this facility is used (it is often restricted to
+ :const:`SOCK_STREAM` sockets), :meth:`recvmsg` will return, in its
+ ancillary data, items of the form ``(socket.SOL_SOCKET,
+ socket.SCM_RIGHTS, fds)``, where *fds* is a :class:`bytes` object
+ representing the new file descriptors as a binary array of the
+ native C :c:type:`int` type. If :meth:`recvmsg` raises an
+ exception after the system call returns, it will first attempt to
+ close any file descriptors received via this mechanism.
+
+ Some systems do not indicate the truncated length of ancillary data
+ items which have been only partially received. If an item appears
+ to extend beyond the end of the buffer, :meth:`recvmsg` will issue
+ a :exc:`RuntimeWarning`, and will return the part of it which is
+ inside the buffer provided it has not been truncated before the
+ start of its associated data.
+
+ On systems which support the :const:`SCM_RIGHTS` mechanism, the
+ following function will receive up to *maxfds* file descriptors,
+ returning the message data and a list containing the descriptors
+ (while ignoring unexpected conditions such as unrelated control
+ messages being received). See also :meth:`sendmsg`. ::
+
+ import socket, array
+
+ def recv_fds(sock, msglen, maxfds):
+ fds = array.array("i") # Array of ints
+ msg, ancdata, flags, addr = sock.recvmsg(msglen, socket.CMSG_LEN(maxfds * fds.itemsize))
+ for cmsg_level, cmsg_type, cmsg_data in ancdata:
+ if (cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_RIGHTS):
+ # Append data, ignoring any truncated integers at the end.
+ fds.fromstring(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % fds.itemsize)])
+ return msg, list(fds)
+
+ Availability: most Unix platforms, possibly others.
+
+ .. versionadded:: 3.3
+
+
+.. method:: socket.recvmsg_into(buffers[, ancbufsize[, flags]])
+
+ Receive normal data and ancillary data from the socket, behaving as
+ :meth:`recvmsg` would, but scatter the non-ancillary data into a
+ series of buffers instead of returning a new bytes object. The
+ *buffers* argument must be an iterable of objects that export
+ writable buffers (e.g. :class:`bytearray` objects); these will be
+ filled with successive chunks of the non-ancillary data until it
+ has all been written or there are no more buffers. The operating
+ system may set a limit (:func:`~os.sysconf` value ``SC_IOV_MAX``)
+ on the number of buffers that can be used. The *ancbufsize* and
+ *flags* arguments have the same meaning as for :meth:`recvmsg`.
+
+ The return value is a 4-tuple: ``(nbytes, ancdata, msg_flags,
+ address)``, where *nbytes* is the total number of bytes of
+ non-ancillary data written into the buffers, and *ancdata*,
+ *msg_flags* and *address* are the same as for :meth:`recvmsg`.
+
+ Example::
+
+ >>> import socket
+ >>> s1, s2 = socket.socketpair()
+ >>> b1 = bytearray(b'----')
+ >>> b2 = bytearray(b'0123456789')
+ >>> b3 = bytearray(b'--------------')
+ >>> s1.send(b'Mary had a little lamb')
+ 22
+ >>> s2.recvmsg_into([b1, memoryview(b2)[2:9], b3])
+ (22, [], 0, None)
+ >>> [b1, b2, b3]
+ [bytearray(b'Mary'), bytearray(b'01 had a 9'), bytearray(b'little lamb---')]
+
+ Availability: most Unix platforms, possibly others.
+
+ .. versionadded:: 3.3
+
+
.. method:: socket.recvfrom_into(buffer[, nbytes[, flags]])
Receive data from the socket, writing it into *buffer* instead of creating a
@@ -755,6 +1015,41 @@ correspond to Unix system calls applicable to sockets.
above.)
+.. method:: socket.sendmsg(buffers[, ancdata[, flags[, address]]])
+
+ Send normal and ancillary data to the socket, gathering the
+ non-ancillary data from a series of buffers and concatenating it
+ into a single message. The *buffers* argument specifies the
+ non-ancillary data as an iterable of buffer-compatible objects
+ (e.g. :class:`bytes` objects); the operating system may set a limit
+ (:func:`~os.sysconf` value ``SC_IOV_MAX``) on the number of buffers
+ that can be used. The *ancdata* argument specifies the ancillary
+ data (control messages) as an iterable of zero or more tuples
+ ``(cmsg_level, cmsg_type, cmsg_data)``, where *cmsg_level* and
+ *cmsg_type* are integers specifying the protocol level and
+ protocol-specific type respectively, and *cmsg_data* is a
+ buffer-compatible object holding the associated data. Note that
+ some systems (in particular, systems without :func:`CMSG_SPACE`)
+ might support sending only one control message per call. The
+ *flags* argument defaults to 0 and has the same meaning as for
+ :meth:`send`. If *address* is supplied and not ``None``, it sets a
+ destination address for the message. The return value is the
+ number of bytes of non-ancillary data sent.
+
+ The following function sends the list of file descriptors *fds*
+ over an :const:`AF_UNIX` socket, on systems which support the
+ :const:`SCM_RIGHTS` mechanism. See also :meth:`recvmsg`. ::
+
+ import socket, array
+
+ def send_fds(sock, msg, fds):
+ return sock.sendmsg([msg], [(socket.SOL_SOCKET, socket.SCM_RIGHTS, array.array("i", fds))])
+
+ Availability: most Unix platforms, possibly others.
+
+ .. versionadded:: 3.3
+
+
.. method:: socket.setblocking(flag)
Set blocking or non-blocking mode of the socket: if *flag* is false, the
@@ -796,9 +1091,22 @@ correspond to Unix system calls applicable to sockets.
Shut down one or both halves of the connection. If *how* is :const:`SHUT_RD`,
further receives are disallowed. If *how* is :const:`SHUT_WR`, further sends
are disallowed. If *how* is :const:`SHUT_RDWR`, further sends and receives are
- disallowed. Depending on the platform, shutting down one half of the connection
- can also close the opposite half (e.g. on Mac OS X, ``shutdown(SHUT_WR)`` does
- not allow further reads on the other end of the connection).
+ disallowed.
+
+
+.. method:: socket.share(process_id)
+
+ :platform: Windows
+
+ Duplacet a socket and prepare it for sharing with a target process. The
+ target process must be provided with *process_id*. The resulting bytes object
+ can then be passed to the target process using some form of interprocess
+ communication and the socket can be recreated there using :func:`fromshare`.
+ Once this method has been called, it is safe to close the socket since
+ the operating system has already duplicated it for the target process.
+
+ .. versionadded:: 3.3
+
Note that there are no methods :meth:`read` or :meth:`write`; use
:meth:`~socket.recv` and :meth:`~socket.send` without *flags* argument instead.
@@ -943,13 +1251,13 @@ sends traffic to the first one connected successfully. ::
af, socktype, proto, canonname, sa = res
try:
s = socket.socket(af, socktype, proto)
- except socket.error as msg:
+ except OSError as msg:
s = None
continue
try:
s.bind(sa)
s.listen(1)
- except socket.error as msg:
+ except OSError as msg:
s.close()
s = None
continue
@@ -978,12 +1286,12 @@ sends traffic to the first one connected successfully. ::
af, socktype, proto, canonname, sa = res
try:
s = socket.socket(af, socktype, proto)
- except socket.error as msg:
+ except OSError as msg:
s = None
continue
try:
s.connect(sa)
- except socket.error as msg:
+ except OSError as msg:
s.close()
s = None
continue
@@ -997,7 +1305,7 @@ sends traffic to the first one connected successfully. ::
print('Received', repr(data))
-The last example shows how to write a very simple network sniffer with raw
+The next example shows how to write a very simple network sniffer with raw
sockets on Windows. The example requires administrator privileges to modify
the interface::
@@ -1022,11 +1330,51 @@ the interface::
# disabled promiscuous mode
s.ioctl(socket.SIO_RCVALL, socket.RCVALL_OFF)
+The last example shows how to use the socket interface to communicate to a CAN
+network. This example might require special priviledge::
+
+ import socket
+ import struct
+
+
+ # CAN frame packing/unpacking (see 'struct can_frame' in <linux/can.h>)
+
+ can_frame_fmt = "=IB3x8s"
+ can_frame_size = struct.calcsize(can_frame_fmt)
+
+ def build_can_frame(can_id, data):
+ can_dlc = len(data)
+ data = data.ljust(8, b'\x00')
+ return struct.pack(can_frame_fmt, can_id, can_dlc, data)
+
+ def dissect_can_frame(frame):
+ can_id, can_dlc, data = struct.unpack(can_frame_fmt, frame)
+ return (can_id, can_dlc, data[:can_dlc])
+
+
+ # create a raw socket and bind it to the 'vcan0' interface
+ s = socket.socket(socket.AF_CAN, socket.SOCK_RAW, socket.CAN_RAW)
+ s.bind(('vcan0',))
+
+ while True:
+ cf, addr = s.recvfrom(can_frame_size)
+
+ print('Received: can_id=%x, can_dlc=%x, data=%s' % dissect_can_frame(cf))
+
+ try:
+ s.send(cf)
+ except OSError:
+ print('Error sending CAN frame')
+
+ try:
+ s.send(build_can_frame(0x01, b'\x01\x02\x03'))
+ except OSError:
+ print('Error sending CAN frame')
Running an example several times with too small delay between executions, could
lead to this error::
- socket.error: [Errno 98] Address already in use
+ OSError: [Errno 98] Address already in use
This is because the previous execution has left the socket in a ``TIME_WAIT``
state, and can't be immediately reused.
diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst
index 5287f17..28e8a0a 100644
--- a/Doc/library/socketserver.rst
+++ b/Doc/library/socketserver.rst
@@ -153,10 +153,24 @@ Server Objects
.. method:: BaseServer.serve_forever(poll_interval=0.5)
- Handle requests until an explicit :meth:`shutdown` request.
- Poll for shutdown every *poll_interval* seconds. Ignores :attr:`self.timeout`.
- If you need to do periodic tasks, do them in another thread.
+ Handle requests until an explicit :meth:`shutdown` request. Poll for
+ shutdown every *poll_interval* seconds. Ignores :attr:`self.timeout`. It
+ also calls :meth:`service_actions`, which may be used by a subclass or mixin
+ to provide actions specific to a given service. For example, the
+ :class:`ForkingMixIn` class uses :meth:`service_actions` to clean up zombie
+ child processes.
+ .. versionchanged:: 3.3
+ Added ``service_actions`` call to the ``serve_forever`` method.
+
+
+.. method:: BaseServer.service_actions()
+
+ This is called in the :meth:`serve_forever` loop. This method is can be
+ overridden by subclasses or mixin classes to perform actions specific to
+ a given service, such as cleanup actions.
+
+ .. versionadded:: 3.3
.. method:: BaseServer.shutdown()
diff --git a/Doc/library/someos.rst b/Doc/library/someos.rst
deleted file mode 100644
index d2009bb..0000000
--- a/Doc/library/someos.rst
+++ /dev/null
@@ -1,24 +0,0 @@
-.. _someos:
-
-**********************************
-Optional Operating System Services
-**********************************
-
-The modules described in this chapter provide interfaces to operating system
-features that are available on selected operating systems only. The interfaces
-are generally modeled after the Unix or C interfaces but they are available on
-some other systems as well (e.g. Windows). Here's an overview:
-
-
-.. toctree::
-
- select.rst
- threading.rst
- multiprocessing.rst
- concurrent.futures.rst
- mmap.rst
- readline.rst
- rlcompleter.rst
- dummy_threading.rst
- _thread.rst
- _dummy_thread.rst
diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst
index 93f6d82..7d156def 100644
--- a/Doc/library/sqlite3.rst
+++ b/Doc/library/sqlite3.rst
@@ -369,6 +369,22 @@ Connection Objects
method with :const:`None` for *handler*.
+ .. method:: set_trace_callback(trace_callback)
+
+ Registers *trace_callback* to be called for each SQL statement that is
+ actually executed by the SQLite backend.
+
+ The only argument passed to the callback is the statement (as string) that
+ is being executed. The return value of the callback is ignored. Note that
+ the backend does not only run statements passed to the :meth:`Cursor.execute`
+ methods. Other sources include the transaction management of the Python
+ module and the execution of triggers defined in the current database.
+
+ Passing :const:`None` as *trace_callback* will disable the trace callback.
+
+ .. versionadded:: 3.3
+
+
.. method:: enable_load_extension(enabled)
This routine allows/disallows the SQLite engine to load SQLite extensions
diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst
index 0f5cea2..77196e1 100644
--- a/Doc/library/ssl.rst
+++ b/Doc/library/ssl.rst
@@ -53,9 +53,69 @@ Functions, Constants, and Exceptions
(currently provided by the OpenSSL library). This signifies some
problem in the higher-level encryption and authentication layer that's
superimposed on the underlying network connection. This error
- is a subtype of :exc:`socket.error`, which in turn is a subtype of
- :exc:`IOError`. The error code and message of :exc:`SSLError` instances
- are provided by the OpenSSL library.
+ is a subtype of :exc:`OSError`. The error code and message of
+ :exc:`SSLError` instances are provided by the OpenSSL library.
+
+ .. versionchanged:: 3.3
+ :exc:`SSLError` used to be a subtype of :exc:`socket.error`.
+
+ .. attribute:: library
+
+ A string mnemonic designating the OpenSSL submodule in which the error
+ occurred, such as ``SSL``, ``PEM`` or ``X509``. The range of possible
+ values depends on the OpenSSL version.
+
+ .. versionadded:: 3.3
+
+ .. attribute:: reason
+
+ A string mnemonic designating the reason this error occurred, for
+ example ``CERTIFICATE_VERIFY_FAILED``. The range of possible
+ values depends on the OpenSSL version.
+
+ .. versionadded:: 3.3
+
+.. exception:: SSLZeroReturnError
+
+ A subclass of :exc:`SSLError` raised when trying to read or write and
+ the SSL connection has been closed cleanly. Note that this doesn't
+ mean that the underlying transport (read TCP) has been closed.
+
+ .. versionadded:: 3.3
+
+.. exception:: SSLWantReadError
+
+ A subclass of :exc:`SSLError` raised by a :ref:`non-blocking SSL socket
+ <ssl-nonblocking>` when trying to read or write data, but more data needs
+ to be received on the underlying TCP transport before the request can be
+ fulfilled.
+
+ .. versionadded:: 3.3
+
+.. exception:: SSLWantWriteError
+
+ A subclass of :exc:`SSLError` raised by a :ref:`non-blocking SSL socket
+ <ssl-nonblocking>` when trying to read or write data, but more data needs
+ to be sent on the underlying TCP transport before the request can be
+ fulfilled.
+
+ .. versionadded:: 3.3
+
+.. exception:: SSLSyscallError
+
+ A subclass of :exc:`SSLError` raised when a system error was encountered
+ while trying to fulfill an operation on a SSL socket. Unfortunately,
+ there is no easy way to inspect the original errno number.
+
+ .. versionadded:: 3.3
+
+.. exception:: SSLEOFError
+
+ A subclass of :exc:`SSLError` raised when the SSL connection has been
+ terminated abruptly. Generally, you shouldn't try to reuse the underlying
+ transport when this error is encountered.
+
+ .. versionadded:: 3.3
.. exception:: CertificateError
@@ -161,6 +221,35 @@ instead.
Random generation
^^^^^^^^^^^^^^^^^
+.. function:: RAND_bytes(num)
+
+ Returns *num* cryptographically strong pseudo-random bytes. Raises an
+ :class:`SSLError` if the PRNG has not been seeded with enough data or if the
+ operation is not supported by the current RAND method. :func:`RAND_status`
+ can be used to check the status of the PRNG and :func:`RAND_add` can be used
+ to seed the PRNG.
+
+ Read the Wikipedia article, `Cryptographically secure pseudorandom number
+ generator (CSPRNG)
+ <http://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator>`_,
+ to get the requirements of a cryptographically generator.
+
+ .. versionadded:: 3.3
+
+.. function:: RAND_pseudo_bytes(num)
+
+ Returns (bytes, is_cryptographic): bytes are *num* pseudo-random bytes,
+ is_cryptographic is True if the bytes generated are cryptographically
+ strong. Raises an :class:`SSLError` if the operation is not supported by the
+ current RAND method.
+
+ Generated pseudo-random byte sequences will be unique if they are of
+ sufficient length, but are not necessarily unpredictable. They can be used
+ for non-cryptographic purposes and for certain purposes in cryptographic
+ protocols, but usually not for key generation etc.
+
+ .. versionadded:: 3.3
+
.. function:: RAND_status()
Returns True if the SSL pseudo-random number generator has been seeded with
@@ -170,7 +259,7 @@ Random generation
.. function:: RAND_egd(path)
- If you are running an entropy-gathering daemon (EGD) somewhere, and ``path``
+ If you are running an entropy-gathering daemon (EGD) somewhere, and *path*
is the pathname of a socket connection open to it, this will read 256 bytes
of randomness from the socket, and add it to the SSL pseudo-random number
generator to increase the security of generated secret keys. This is
@@ -181,8 +270,8 @@ Random generation
.. function:: RAND_add(bytes, entropy)
- Mixes the given ``bytes`` into the SSL pseudo-random number generator. The
- parameter ``entropy`` (a float) is a lower bound on the entropy contained in
+ Mixes the given *bytes* into the SSL pseudo-random number generator. The
+ parameter *entropy* (a float) is a lower bound on the entropy contained in
string (so you can always use :const:`0.0`). See :rfc:`1750` for more
information on sources of entropy.
@@ -238,6 +327,9 @@ Certificate handling
will attempt to validate the server certificate against that set of root
certificates, and will fail if the validation attempt fails.
+ .. versionchanged:: 3.3
+ This function is now IPv6-compatible.
+
.. function:: DER_cert_to_PEM_cert(DER_cert_bytes)
Given a certificate as a DER-encoded blob of bytes, returns a PEM-encoded
@@ -345,6 +437,46 @@ Constants
.. versionadded:: 3.2
+.. data:: OP_CIPHER_SERVER_PREFERENCE
+
+ Use the server's cipher ordering preference, rather than the client's.
+ This option has no effect on client sockets and SSLv2 server sockets.
+
+ .. versionadded:: 3.3
+
+.. data:: OP_SINGLE_DH_USE
+
+ Prevents re-use of the same DH key for distinct SSL sessions. This
+ improves forward secrecy but requires more computational resources.
+ This option only applies to server sockets.
+
+ .. versionadded:: 3.3
+
+.. data:: OP_SINGLE_ECDH_USE
+
+ Prevents re-use of the same ECDH key for distinct SSL sessions. This
+ improves forward secrecy but requires more computational resources.
+ This option only applies to server sockets.
+
+ .. versionadded:: 3.3
+
+.. data:: OP_NO_COMPRESSION
+
+ Disable compression on the SSL channel. This is useful if the application
+ protocol supports its own compression scheme.
+
+ This option is only available with OpenSSL 1.0.0 and later.
+
+ .. versionadded:: 3.3
+
+.. data:: HAS_ECDH
+
+ Whether the OpenSSL library has built-in support for Elliptic Curve-based
+ Diffie-Hellman key exchange. This should be true unless the feature was
+ explicitly disabled by the distributor.
+
+ .. versionadded:: 3.3
+
.. data:: HAS_SNI
Whether the OpenSSL library has built-in support for the *Server Name
@@ -354,6 +486,23 @@ Constants
.. versionadded:: 3.2
+.. data:: HAS_NPN
+
+ Whether the OpenSSL library has built-in support for *Next Protocol
+ Negotiation* as described in the `NPN draft specification
+ <http://tools.ietf.org/html/draft-agl-tls-nextprotoneg>`_. When true,
+ you can use the :meth:`SSLContext.set_npn_protocols` method to advertise
+ which protocols you want to support.
+
+ .. versionadded:: 3.3
+
+.. data:: CHANNEL_BINDING_TYPES
+
+ List of supported TLS channel binding types. Strings in this list
+ can be used as arguments to :meth:`SSLSocket.get_channel_binding`.
+
+ .. versionadded:: 3.3
+
.. data:: OPENSSL_VERSION
The version string of the OpenSSL library loaded by the interpreter::
@@ -479,6 +628,37 @@ SSL sockets also have the following additional methods and attributes:
version of the SSL protocol that defines its use, and the number of secret
bits being used. If no connection has been established, returns ``None``.
+.. method:: SSLSocket.compression()
+
+ Return the compression algorithm being used as a string, or ``None``
+ if the connection isn't compressed.
+
+ If the higher-level protocol supports its own compression mechanism,
+ you can use :data:`OP_NO_COMPRESSION` to disable SSL-level compression.
+
+ .. versionadded:: 3.3
+
+.. method:: SSLSocket.get_channel_binding(cb_type="tls-unique")
+
+ Get channel binding data for current connection, as a bytes object. Returns
+ ``None`` if not connected or the handshake has not been completed.
+
+ The *cb_type* parameter allow selection of the desired channel binding
+ type. Valid channel binding types are listed in the
+ :data:`CHANNEL_BINDING_TYPES` list. Currently only the 'tls-unique' channel
+ binding, defined by :rfc:`5929`, is supported. :exc:`ValueError` will be
+ raised if an unsupported channel binding type is requested.
+
+ .. versionadded:: 3.3
+
+.. method:: SSLSocket.selected_npn_protocol()
+
+ Returns the protocol that was selected during the TLS/SSL handshake. If
+ :meth:`SSLContext.set_npn_protocols` was not called, or if the other party
+ does not support NPN, or if the handshake has not yet happened, this will
+ return ``None``.
+
+ .. versionadded:: 3.3
.. method:: SSLSocket.unwrap()
@@ -488,7 +668,6 @@ SSL sockets also have the following additional methods and attributes:
returned socket should always be used for further communication with the
other side of the connection, rather than the original socket.
-
.. attribute:: SSLSocket.context
The :class:`SSLContext` object this SSL socket is tied to. If the SSL
@@ -518,7 +697,7 @@ to speed up repeated connections from the same clients.
:class:`SSLContext` objects have the following methods and attributes:
-.. method:: SSLContext.load_cert_chain(certfile, keyfile=None)
+.. method:: SSLContext.load_cert_chain(certfile, keyfile=None, password=None)
Load a private key and the corresponding certificate. The *certfile*
string must be the path to a single file in PEM format containing the
@@ -529,9 +708,25 @@ to speed up repeated connections from the same clients.
:ref:`ssl-certificates` for more information on how the certificate
is stored in the *certfile*.
+ The *password* argument may be a function to call to get the password for
+ decrypting the private key. It will only be called if the private key is
+ encrypted and a password is necessary. It will be called with no arguments,
+ and it should return a string, bytes, or bytearray. If the return value is
+ a string it will be encoded as UTF-8 before using it to decrypt the key.
+ Alternatively a string, bytes, or bytearray value may be supplied directly
+ as the *password* argument. It will be ignored if the private key is not
+ encrypted and no password is needed.
+
+ If the *password* argument is not specified and a password is required,
+ OpenSSL's built-in password prompting mechanism will be used to
+ interactively prompt the user for a password.
+
An :class:`SSLError` is raised if the private key doesn't
match with the certificate.
+ .. versionchanged:: 3.3
+ New optional argument *password*.
+
.. method:: SSLContext.load_verify_locations(cafile=None, capath=None)
Load a set of "certification authority" (CA) certificates used to validate
@@ -570,6 +765,53 @@ to speed up repeated connections from the same clients.
when connected, the :meth:`SSLSocket.cipher` method of SSL sockets will
give the currently selected cipher.
+.. method:: SSLContext.set_npn_protocols(protocols)
+
+ Specify which protocols the socket should avertise during the SSL/TLS
+ handshake. It should be a list of strings, like ``['http/1.1', 'spdy/2']``,
+ ordered by preference. The selection of a protocol will happen during the
+ handshake, and will play out according to the `NPN draft specification
+ <http://tools.ietf.org/html/draft-agl-tls-nextprotoneg>`_. After a
+ successful handshake, the :meth:`SSLSocket.selected_npn_protocol` method will
+ return the agreed-upon protocol.
+
+ This method will raise :exc:`NotImplementedError` if :data:`HAS_NPN` is
+ False.
+
+ .. versionadded:: 3.3
+
+.. method:: SSLContext.load_dh_params(dhfile)
+
+ Load the key generation parameters for Diffie-Helman (DH) key exchange.
+ Using DH key exchange improves forward secrecy at the expense of
+ computational resources (both on the server and on the client).
+ The *dhfile* parameter should be the path to a file containing DH
+ parameters in PEM format.
+
+ This setting doesn't apply to client sockets. You can also use the
+ :data:`OP_SINGLE_DH_USE` option to further improve security.
+
+ .. versionadded:: 3.3
+
+.. method:: SSLContext.set_ecdh_curve(curve_name)
+
+ Set the curve name for Elliptic Curve-based Diffie-Hellman (ECDH) key
+ exchange. ECDH is significantly faster than regular DH while arguably
+ as secure. The *curve_name* parameter should be a string describing
+ a well-known elliptic curve, for example ``prime256v1`` for a widely
+ supported curve.
+
+ This setting doesn't apply to client sockets. You can also use the
+ :data:`OP_SINGLE_ECDH_USE` option to further improve security.
+
+ This method is not available if :data:`HAS_ECDH` is False.
+
+ .. versionadded:: 3.3
+
+ .. seealso::
+ `SSL/TLS & Perfect Forward Secrecy <http://vincent.bernat.im/en/blog/2011-ssl-perfect-forward-secrecy.html>`_
+ Vincent Bernat.
+
.. method:: SSLContext.wrap_socket(sock, server_side=False, \
do_handshake_on_connect=True, suppress_ragged_eofs=True, \
server_hostname=None)
@@ -984,13 +1226,10 @@ to be aware of:
try:
sock.do_handshake()
break
- except ssl.SSLError as err:
- if err.args[0] == ssl.SSL_ERROR_WANT_READ:
- select.select([sock], [], [])
- elif err.args[0] == ssl.SSL_ERROR_WANT_WRITE:
- select.select([], [sock], [])
- else:
- raise
+ except ssl.SSLWantReadError:
+ select.select([sock], [], [])
+ except ssl.SSLWantWriteError:
+ select.select([], [sock], [])
.. _ssl-security:
diff --git a/Doc/library/stat.rst b/Doc/library/stat.rst
index 7de98b6..f47f464 100644
--- a/Doc/library/stat.rst
+++ b/Doc/library/stat.rst
@@ -104,6 +104,16 @@ Example::
if __name__ == '__main__':
walktree(sys.argv[1], visitfile)
+An additional utility function is provided to covert a file's mode in a human
+readable string:
+
+.. function:: filemode(mode)
+
+ Convert a file's mode to a string of the form '-rwxrwxrwx'.
+
+ .. versionadded:: 3.3
+
+
All the variables below are simply symbolic indexes into the 10-tuple returned
by :func:`os.stat`, :func:`os.fstat` or :func:`os.lstat`.
@@ -344,4 +354,3 @@ The following flags can be used in the *flags* argument of :func:`os.chflags`:
The file is a snapshot file.
See the \*BSD or Mac OS systems man page :manpage:`chflags(2)` for more information.
-
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index 20174c5..1532dbb 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -15,6 +15,10 @@ interpreter.
The principal built-in types are numerics, sequences, mappings, classes,
instances and exceptions.
+Some collection classes are mutable. The methods that add, subtract, or
+rearrange their members in place, and don't return a specific item, never return
+the collection instance itself but ``None``.
+
Some operations are supported by several object types; in particular,
practically all objects can be compared, tested for truth value, and converted
to a string (with the :func:`repr` function or the slightly different
@@ -641,34 +645,34 @@ made available to Python as the :attr:`modulus` attribute of
Here are the rules in detail:
- - If ``x = m / n`` is a nonnegative rational number and ``n`` is not divisible
- by ``P``, define ``hash(x)`` as ``m * invmod(n, P) % P``, where ``invmod(n,
- P)`` gives the inverse of ``n`` modulo ``P``.
+- If ``x = m / n`` is a nonnegative rational number and ``n`` is not divisible
+ by ``P``, define ``hash(x)`` as ``m * invmod(n, P) % P``, where ``invmod(n,
+ P)`` gives the inverse of ``n`` modulo ``P``.
- - If ``x = m / n`` is a nonnegative rational number and ``n`` is
- divisible by ``P`` (but ``m`` is not) then ``n`` has no inverse
- modulo ``P`` and the rule above doesn't apply; in this case define
- ``hash(x)`` to be the constant value ``sys.hash_info.inf``.
+- If ``x = m / n`` is a nonnegative rational number and ``n`` is
+ divisible by ``P`` (but ``m`` is not) then ``n`` has no inverse
+ modulo ``P`` and the rule above doesn't apply; in this case define
+ ``hash(x)`` to be the constant value ``sys.hash_info.inf``.
- - If ``x = m / n`` is a negative rational number define ``hash(x)``
- as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with
- ``-2``.
+- If ``x = m / n`` is a negative rational number define ``hash(x)``
+ as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with
+ ``-2``.
- - The particular values ``sys.hash_info.inf``, ``-sys.hash_info.inf``
- and ``sys.hash_info.nan`` are used as hash values for positive
- infinity, negative infinity, or nans (respectively). (All hashable
- nans have the same hash value.)
+- The particular values ``sys.hash_info.inf``, ``-sys.hash_info.inf``
+ and ``sys.hash_info.nan`` are used as hash values for positive
+ infinity, negative infinity, or nans (respectively). (All hashable
+ nans have the same hash value.)
- - For a :class:`complex` number ``z``, the hash values of the real
- and imaginary parts are combined by computing ``hash(z.real) +
- sys.hash_info.imag * hash(z.imag)``, reduced modulo
- ``2**sys.hash_info.width`` so that it lies in
- ``range(-2**(sys.hash_info.width - 1), 2**(sys.hash_info.width -
- 1))``. Again, if the result is ``-1``, it's replaced with ``-2``.
+- For a :class:`complex` number ``z``, the hash values of the real
+ and imaginary parts are combined by computing ``hash(z.real) +
+ sys.hash_info.imag * hash(z.imag)``, reduced modulo
+ ``2**sys.hash_info.width`` so that it lies in
+ ``range(-2**(sys.hash_info.width - 1), 2**(sys.hash_info.width -
+ 1))``. Again, if the result is ``-1``, it's replaced with ``-2``.
To clarify the above rules, here's some example Python code,
-equivalent to the builtin hash, for computing the hash of a rational
+equivalent to the built-in hash, for computing the hash of a rational
number, :class:`float`, or :class:`complex`::
@@ -796,110 +800,35 @@ the yield expression <yieldexpr>`.
.. _typesseq:
-Sequence Types --- :class:`str`, :class:`bytes`, :class:`bytearray`, :class:`list`, :class:`tuple`, :class:`range`
-==================================================================================================================
+Sequence Types --- :class:`list`, :class:`tuple`, :class:`range`
+================================================================
-There are six sequence types: strings, byte sequences (:class:`bytes` objects),
-byte arrays (:class:`bytearray` objects), lists, tuples, and range objects. For
-other containers see the built in :class:`dict` and :class:`set` classes, and
-the :mod:`collections` module.
+There are three basic sequence types: lists, tuples, and range objects.
+Additional sequence types tailored for processing of
+:ref:`binary data <binaryseq>` and :ref:`text strings <textseq>` are
+described in dedicated sections.
-.. index::
- object: sequence
- object: string
- object: bytes
- object: bytearray
- object: tuple
- object: list
- object: range
-
-Strings contain Unicode characters. Their literals are written in single or
-double quotes: ``'xyzzy'``, ``"frobozz"``. See :ref:`strings` for more about
-string literals. In addition to the functionality described here, there are
-also string-specific methods described in the :ref:`string-methods` section.
-
-Bytes and bytearray objects contain single bytes -- the former is immutable
-while the latter is a mutable sequence.
-Bytes objects can be constructed by using the
-constructor, :func:`bytes`, and from literals; use a ``b`` prefix with normal
-string syntax: ``b'xyzzy'``. To construct byte arrays, use the
-:func:`bytearray` function.
-
-While string objects are sequences of characters (represented by strings of
-length 1), bytes and bytearray objects are sequences of *integers* (between 0
-and 255), representing the ASCII value of single bytes. That means that for
-a bytes or bytearray object *b*, ``b[0]`` will be an integer, while
-``b[0:1]`` will be a bytes or bytearray object of length 1. The
-representation of bytes objects uses the literal format (``b'...'``) since it
-is generally more useful than e.g. ``bytes([50, 19, 100])``. You can always
-convert a bytes object into a list of integers using ``list(b)``.
-
-Also, while in previous Python versions, byte strings and Unicode strings
-could be exchanged for each other rather freely (barring encoding issues),
-strings and bytes are now completely separate concepts. There's no implicit
-en-/decoding if you pass an object of the wrong type. A string always
-compares unequal to a bytes or bytearray object.
-
-Lists are constructed with square brackets, separating items with commas: ``[a,
-b, c]``. Tuples are constructed by the comma operator (not within square
-brackets), with or without enclosing parentheses, but an empty tuple must have
-the enclosing parentheses, such as ``a, b, c`` or ``()``. A single item tuple
-must have a trailing comma, such as ``(d,)``.
-
-Objects of type range are created using the :func:`range` function. They don't
-support concatenation or repetition, and using :func:`min` or :func:`max` on
-them is inefficient.
-
-Most sequence types support the following operations. The ``in`` and ``not in``
-operations have the same priorities as the comparison operations. The ``+`` and
-``*`` operations have the same priority as the corresponding numeric operations.
-[3]_ Additional methods are provided for :ref:`typesseq-mutable`.
+.. _typesseq-common:
+
+Common Sequence Operations
+--------------------------
+
+.. index:: object: sequence
+
+The operations in the following table are supported by most sequence types,
+both mutable and immutable. The :class:`collections.abc.Sequence` ABC is
+provided to make it easier to correctly implement these operations on
+custom sequence types.
This table lists the sequence operations sorted in ascending priority
(operations in the same box have the same priority). In the table, *s* and *t*
-are sequences of the same type; *n*, *i*, *j* and *k* are integers.
-
-+------------------+--------------------------------+----------+
-| Operation | Result | Notes |
-+==================+================================+==========+
-| ``x in s`` | ``True`` if an item of *s* is | \(1) |
-| | equal to *x*, else ``False`` | |
-+------------------+--------------------------------+----------+
-| ``x not in s`` | ``False`` if an item of *s* is | \(1) |
-| | equal to *x*, else ``True`` | |
-+------------------+--------------------------------+----------+
-| ``s + t`` | the concatenation of *s* and | \(6) |
-| | *t* | |
-+------------------+--------------------------------+----------+
-| ``s * n, n * s`` | *n* shallow copies of *s* | \(2) |
-| | concatenated | |
-+------------------+--------------------------------+----------+
-| ``s[i]`` | *i*\ th item of *s*, origin 0 | \(3) |
-+------------------+--------------------------------+----------+
-| ``s[i:j]`` | slice of *s* from *i* to *j* | (3)(4) |
-+------------------+--------------------------------+----------+
-| ``s[i:j:k]`` | slice of *s* from *i* to *j* | (3)(5) |
-| | with step *k* | |
-+------------------+--------------------------------+----------+
-| ``len(s)`` | length of *s* | |
-+------------------+--------------------------------+----------+
-| ``min(s)`` | smallest item of *s* | |
-+------------------+--------------------------------+----------+
-| ``max(s)`` | largest item of *s* | |
-+------------------+--------------------------------+----------+
-| ``s.index(i)`` | index of the first occurence | |
-| | of *i* in *s* | |
-+------------------+--------------------------------+----------+
-| ``s.count(i)`` | total number of occurences of | |
-| | *i* in *s* | |
-+------------------+--------------------------------+----------+
-
-Sequence types also support comparisons. In particular, tuples and lists are
-compared lexicographically by comparing corresponding elements. This means that
-to compare equal, every element must compare equal and the two sequences must be
-of the same type and have the same length. (For full details see
-:ref:`comparisons` in the language reference.)
+are sequences of the same type, *n*, *i*, *j* and *k* are integers and *x* is
+an arbitrary object that meets any type and value restrictions imposed by *s*.
+
+The ``in`` and ``not in`` operations have the same priorities as the
+comparison operations. The ``+`` (concatenation) and ``*`` (repetition)
+operations have the same priority as the corresponding numeric operations.
.. index::
triple: operations on; sequence; types
@@ -912,18 +841,67 @@ of the same type and have the same length. (For full details see
pair: slice; operation
operator: in
operator: not in
+ single: count() (sequence method)
+ single: index() (sequence method)
+
++--------------------------+--------------------------------+----------+
+| Operation | Result | Notes |
++==========================+================================+==========+
+| ``x in s`` | ``True`` if an item of *s* is | \(1) |
+| | equal to *x*, else ``False`` | |
++--------------------------+--------------------------------+----------+
+| ``x not in s`` | ``False`` if an item of *s* is | \(1) |
+| | equal to *x*, else ``True`` | |
++--------------------------+--------------------------------+----------+
+| ``s + t`` | the concatenation of *s* and | (6)(7) |
+| | *t* | |
++--------------------------+--------------------------------+----------+
+| ``s * n`` or | *n* shallow copies of *s* | (2)(7) |
+| ``n * s`` | concatenated | |
++--------------------------+--------------------------------+----------+
+| ``s[i]`` | *i*\ th item of *s*, origin 0 | \(3) |
++--------------------------+--------------------------------+----------+
+| ``s[i:j]`` | slice of *s* from *i* to *j* | (3)(4) |
++--------------------------+--------------------------------+----------+
+| ``s[i:j:k]`` | slice of *s* from *i* to *j* | (3)(5) |
+| | with step *k* | |
++--------------------------+--------------------------------+----------+
+| ``len(s)`` | length of *s* | |
++--------------------------+--------------------------------+----------+
+| ``min(s)`` | smallest item of *s* | |
++--------------------------+--------------------------------+----------+
+| ``max(s)`` | largest item of *s* | |
++--------------------------+--------------------------------+----------+
+| ``s.index(x[, i[, j]])`` | index of the first occurence | \(8) |
+| | of *x* in *s* (at or after | |
+| | index *i* and before index *j*)| |
++--------------------------+--------------------------------+----------+
+| ``s.count(x)`` | total number of occurences of | |
+| | *x* in *s* | |
++--------------------------+--------------------------------+----------+
+
+Sequences of the same type also support comparisons. In particular, tuples
+and lists are compared lexicographically by comparing corresponding elements.
+This means that to compare equal, every element must compare equal and the
+two sequences must be of the same type and have the same length. (For full
+details see :ref:`comparisons` in the language reference.)
Notes:
(1)
- When *s* is a string object, the ``in`` and ``not in`` operations act like a
- substring test.
+ While the ``in`` and ``not in`` operations are used only for simple
+ containment testing in the general case, some specialised sequences
+ (such as :class:`str`, :class:`bytes` and :class:`bytearray`) also use
+ them for subsequence testing::
+
+ >>> "gg" in "eggs"
+ True
(2)
Values of *n* less than ``0`` are treated as ``0`` (which yields an empty
sequence of the same type as *s*). Note also that the copies are shallow;
nested structures are not copied. This often haunts new Python programmers;
- consider:
+ consider::
>>> lists = [[]] * 3
>>> lists
@@ -935,7 +913,7 @@ Notes:
What has happened is that ``[[]]`` is a one-element list containing an empty
list, so all three elements of ``[[]] * 3`` are (pointers to) this single empty
list. Modifying any of the elements of ``lists`` modifies this single list.
- You can create a list of different lists this way:
+ You can create a list of different lists this way::
>>> lists = [[] for i in range(3)]
>>> lists[0].append(3)
@@ -966,33 +944,476 @@ Notes:
If *k* is ``None``, it is treated like ``1``.
(6)
- Concatenating immutable strings always results in a new object. This means
- that building up a string by repeated concatenation will have a quadratic
- runtime cost in the total string length. To get a linear runtime cost,
- you must switch to one of the alternatives below:
+ Concatenating immutable sequences always results in a new object. This
+ means that building up a sequence by repeated concatenation will have a
+ quadratic runtime cost in the total sequence length. To get a linear
+ runtime cost, you must switch to one of the alternatives below:
* if concatenating :class:`str` objects, you can build a list and use
- :meth:`str.join` at the end;
+ :meth:`str.join` at the end or else write to a :class:`io.StringIO`
+ instance and retrieve its value when complete
* if concatenating :class:`bytes` objects, you can similarly use
- :meth:`bytes.join`, or you can do in-place concatenation with a
- :class:`bytearray` object. :class:`bytearray` objects are mutable and
- have an efficient overallocation mechanism.
+ :meth:`bytes.join` or :class:`io.BytesIO`, or you can do in-place
+ concatenation with a :class:`bytearray` object. :class:`bytearray`
+ objects are mutable and have an efficient overallocation mechanism
+
+ * if concatenating :class:`tuple` objects, extend a :class:`list` instead
+
+ * for other types, investigate the relevant class documentation
+
+
+(7)
+ Some sequence types (such as :class:`range`) only support item sequences
+ that follow specific patterns, and hence don't support sequence
+ concatenation or repetition.
+
+(8)
+ ``index`` raises :exc:`ValueError` when *x* is not found in *s*.
+ When supported, the additional arguments to the index method allow
+ efficient searching of subsections of the sequence. Passing the extra
+ arguments is roughly equivalent to using ``s[i:j].index(x)``, only
+ without copying any data and with the returned index being relative to
+ the start of the sequence rather than the start of the slice.
+
+
+.. _typesseq-immutable:
+
+Immutable Sequence Types
+------------------------
+
+.. index::
+ triple: immutable; sequence; types
+ object: tuple
+ builtin: hash
+
+The only operation that immutable sequence types generally implement that is
+not also implemented by mutable sequence types is support for the :func:`hash`
+built-in.
+
+This support allows immutable sequences, such as :class:`tuple` instances, to
+be used as :class:`dict` keys and stored in :class:`set` and :class:`frozenset`
+instances.
+
+Attempting to hash an immutable sequence that contains unhashable values will
+result in :exc:`TypeError`.
+
+
+.. _typesseq-mutable:
+
+Mutable Sequence Types
+----------------------
+
+.. index::
+ triple: mutable; sequence; types
+ object: list
+ object: bytearray
+
+The operations in the following table are defined on mutable sequence types.
+The :class:`collections.abc.MutableSequence` ABC is provided to make it
+easier to correctly implement these operations on custom sequence types.
+
+In the table *s* is an instance of a mutable sequence type, *t* is any
+iterable object and *x* is an arbitrary object that meets any type
+and value restrictions imposed by *s* (for example, :class:`bytearray` only
+accepts integers that meet the value restriction ``0 <= x <= 255``).
+
+
+.. index::
+ triple: operations on; sequence; types
+ triple: operations on; list; type
+ pair: subscript; assignment
+ pair: slice; assignment
+ statement: del
+ single: append() (sequence method)
+ single: clear() (sequence method)
+ single: copy() (sequence method)
+ single: extend() (sequence method)
+ single: insert() (sequence method)
+ single: pop() (sequence method)
+ single: remove() (sequence method)
+ single: reverse() (sequence method)
+
++------------------------------+--------------------------------+---------------------+
+| Operation | Result | Notes |
++==============================+================================+=====================+
+| ``s[i] = x`` | item *i* of *s* is replaced by | |
+| | *x* | |
++------------------------------+--------------------------------+---------------------+
+| ``s[i:j] = t`` | slice of *s* from *i* to *j* | |
+| | is replaced by the contents of | |
+| | the iterable *t* | |
++------------------------------+--------------------------------+---------------------+
+| ``del s[i:j]`` | same as ``s[i:j] = []`` | |
++------------------------------+--------------------------------+---------------------+
+| ``s[i:j:k] = t`` | the elements of ``s[i:j:k]`` | \(1) |
+| | are replaced by those of *t* | |
++------------------------------+--------------------------------+---------------------+
+| ``del s[i:j:k]`` | removes the elements of | |
+| | ``s[i:j:k]`` from the list | |
++------------------------------+--------------------------------+---------------------+
+| ``s.append(x)`` | appends *x* to the end of the | |
+| | sequence (same as | |
+| | ``s[len(s):len(s)] = [x]``) | |
++------------------------------+--------------------------------+---------------------+
+| ``s.clear()`` | removes all items from ``s`` | \(5) |
+| | (same as ``del s[:]``) | |
++------------------------------+--------------------------------+---------------------+
+| ``s.copy()`` | creates a shallow copy of ``s``| \(5) |
+| | (same as ``s[:]``) | |
++------------------------------+--------------------------------+---------------------+
+| ``s.extend(t)`` | extends *s* with the | |
+| | contents of *t* (same as | |
+| | ``s[len(s):len(s)] = t``) | |
++------------------------------+--------------------------------+---------------------+
+| ``s.insert(i, x)`` | inserts *x* into *s* at the | |
+| | index given by *i* | |
+| | (same as ``s[i:i] = [x]``) | |
++------------------------------+--------------------------------+---------------------+
+| ``s.pop([i])`` | retrieves the item at *i* and | \(2) |
+| | also removes it from *s* | |
++------------------------------+--------------------------------+---------------------+
+| ``s.remove(x)`` | remove the first item from *s* | \(3) |
+| | where ``s[i] == x`` | |
++------------------------------+--------------------------------+---------------------+
+| ``s.reverse()`` | reverses the items of *s* in | \(4) |
+| | place | |
++------------------------------+--------------------------------+---------------------+
+
+
+Notes:
+
+(1)
+ *t* must have the same length as the slice it is replacing.
+
+(2)
+ The optional argument *i* defaults to ``-1``, so that by default the last
+ item is removed and returned.
+
+(3)
+ ``remove`` raises :exc:`ValueError` when *x* is not found in *s*.
+
+(4)
+ The :meth:`reverse` method modifies the sequence in place for economy of
+ space when reversing a large sequence. To remind users that it operates by
+ side effect, it does not return the reversed sequence.
+
+(5)
+ :meth:`clear` and :meth:`!copy` are included for consistency with the
+ interfaces of mutable containers that don't support slicing operations
+ (such as :class:`dict` and :class:`set`)
+
+ .. versionadded:: 3.3
+ :meth:`clear` and :meth:`!copy` methods.
+
+
+.. _typesseq-list:
+
+Lists
+-----
+
+.. index:: object: list
+
+Lists are mutable sequences, typically used to store collections of
+homogeneous items (where the precise degree of similarity will vary by
+application).
+
+.. class:: list([iterable])
+
+ Lists may be constructed in several ways:
+
+ * Using a pair of square brackets to denote the empty list: ``[]``
+ * Using square brackets, separating items with commas: ``[a]``, ``[a, b, c]``
+ * Using a list comprehension: ``[x for x in iterable]``
+ * Using the type constructor: ``list()`` or ``list(iterable)``
+
+ The constructor builds a list whose items are the same and in the same
+ order as *iterable*'s items. *iterable* may be either a sequence, a
+ container that supports iteration, or an iterator object. If *iterable*
+ is already a list, a copy is made and returned, similar to ``iterable[:]``.
+ For example, ``list('abc')`` returns ``['a', 'b', 'c']`` and
+ ``list( (1, 2, 3) )`` returns ``[1, 2, 3]``.
+ If no argument is given, the constructor creates a new empty list, ``[]``.
+
+
+ Many other operations also produce lists, including the :func:`sorted`
+ built-in.
+
+ Lists implement all of the :ref:`common <typesseq-common>` and
+ :ref:`mutable <typesseq-mutable>` sequence operations. Lists also provide the
+ following additional method:
+
+ .. method:: list.sort(*, key=None, reverse=None)
+
+ This method sorts the list in place, using only ``<`` comparisons
+ between items. Exceptions are not suppressed - if any comparison operations
+ fail, the entire sort operation will fail (and the list will likely be left
+ in a partially modified state).
+
+ *key* specifies a function of one argument that is used to extract a
+ comparison key from each list element (for example, ``key=str.lower``).
+ The key corresponding to each item in the list is calculated once and
+ then used for the entire sorting process. The default value of ``None``
+ means that list items are sorted directly without calculating a separate
+ key value.
+
+ The :func:`functools.cmp_to_key` utility is available to convert a 2.x
+ style *cmp* function to a *key* function.
+
+ *reverse* is a boolean value. If set to ``True``, then the list elements
+ are sorted as if each comparison were reversed.
+
+ This method modifies the sequence in place for economy of space when
+ sorting a large sequence. To remind users that it operates by side
+ effect, it does not return the sorted sequence (use :func:`sorted` to
+ explicitly request a new sorted list instance).
+
+ The :meth:`sort` method is guaranteed to be stable. A sort is stable if it
+ guarantees not to change the relative order of elements that compare equal
+ --- this is helpful for sorting in multiple passes (for example, sort by
+ department, then by salary grade).
+
+ .. impl-detail::
+
+ While a list is being sorted, the effect of attempting to mutate, or even
+ inspect, the list is undefined. The C implementation of Python makes the
+ list appear empty for the duration, and raises :exc:`ValueError` if it can
+ detect that the list has been mutated during a sort.
+
+
+.. _typesseq-tuple:
+
+Tuples
+------
+
+.. index:: object: tuple
+
+Tuples are immutable sequences, typically used to store collections of
+heterogeneous data (such as the 2-tuples produced by the :func:`enumerate`
+built-in). Tuples are also used for cases where an immutable sequence of
+homogeneous data is needed (such as allowing storage in a :class:`set` or
+:class:`dict` instance).
+
+.. class:: tuple([iterable])
+
+ Tuples may be constructed in a number of ways:
+
+ * Using a pair of parentheses to denote the empty tuple: ``()``
+ * Using a trailing comma for a singleton tuple: ``a,`` or ``(a,)``
+ * Separating items with commas: ``a, b, c`` or ``(a, b, c)``
+ * Using the :func:`tuple` built-in: ``tuple()`` or ``tuple(iterable)``
+
+ The constructor builds a tuple whose items are the same and in the same
+ order as *iterable*'s items. *iterable* may be either a sequence, a
+ container that supports iteration, or an iterator object. If *iterable*
+ is already a tuple, it is returned unchanged. For example,
+ ``tuple('abc')`` returns ``('a', 'b', 'c')`` and
+ ``tuple( [1, 2, 3] )`` returns ``(1, 2, 3)``.
+ If no argument is given, the constructor creates a new empty tuple, ``()``.
+
+ Note that it is actually the comma which makes a tuple, not the parentheses.
+ The parentheses are optional, except in the empty tuple case, or
+ when they are needed to avoid syntactic ambiguity. For example,
+ ``f(a, b, c)`` is a function call with three arguments, while
+ ``f((a, b, c))`` is a function call with a 3-tuple as the sole argument.
+
+ Tuples implement all of the :ref:`common <typesseq-common>` sequence
+ operations.
+
+For heterogeneous collections of data where access by name is clearer than
+access by index, :func:`collections.namedtuple` may be a more appropriate
+choice than a simple tuple object.
+.. _typesseq-range:
+
+Ranges
+------
+
+.. index:: object: range
+
+The :class:`range` type represents an immutable sequence of numbers and is
+commonly used for looping a specific number of times in :keyword:`for`
+loops.
+
+.. class:: range(stop)
+ range(start, stop[, step])
+
+ The arguments to the range constructor must be integers (either built-in
+ :class:`int` or any object that implements the ``__index__`` special
+ method). If the *step* argument is omitted, it defaults to ``1``.
+ If the *start* argument is omitted, it defaults to ``0``.
+ If *step* is zero, :exc:`ValueError` is raised.
+
+ For a positive *step*, the contents of a range ``r`` are determined by the
+ formula ``r[i] = start + step*i`` where ``i >= 0`` and
+ ``r[i] < stop``.
+
+ For a negative *step*, the contents of the range are still determined by
+ the formula ``r[i] = start + step*i``, but the constraints are ``i >= 0``
+ and ``r[i] > stop``.
+
+ A range object will be empty if ``r[0]`` does not meant the value
+ constraint. Ranges do support negative indices, but these are interpreted
+ as indexing from the end of the sequence determined by the positive
+ indices.
+
+ Ranges containing absolute values larger than :data:`sys.maxsize` are
+ permitted but some features (such as :func:`len`) may raise
+ :exc:`OverflowError`.
+
+ Range examples::
+
+ >>> list(range(10))
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+ >>> list(range(1, 11))
+ [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+ >>> list(range(0, 30, 5))
+ [0, 5, 10, 15, 20, 25]
+ >>> list(range(0, 10, 3))
+ [0, 3, 6, 9]
+ >>> list(range(0, -10, -1))
+ [0, -1, -2, -3, -4, -5, -6, -7, -8, -9]
+ >>> list(range(0))
+ []
+ >>> list(range(1, 0))
+ []
+
+ Ranges implement all of the :ref:`common <typesseq-common>` sequence operations
+ except concatenation and repetition (due to the fact that range objects can
+ only represent sequences that follow a strict pattern and repetition and
+ concatenation will usually violate that pattern).
+
+ .. data: start
+
+ The value of the *start* parameter (or ``0`` if the parameter was
+ not supplied)
+
+ .. data: stop
+
+ The value of the *stop* parameter
+
+ .. data: step
+
+ The value of the *step* parameter (or ``1`` if the parameter was
+ not supplied)
+
+The advantage of the :class:`range` type over a regular :class:`list` or
+:class:`tuple` is that a :class:`range` object will always take the same
+(small) amount of memory, no matter the size of the range it represents (as it
+only stores the ``start``, ``stop`` and ``step`` values, calculating individual
+items and subranges as needed).
+
+Range objects implement the :class:`collections.Sequence` ABC, and provide
+features such as containment tests, element index lookup, slicing and
+support for negative indices (see :ref:`typesseq`):
+
+ >>> r = range(0, 20, 2)
+ >>> r
+ range(0, 20, 2)
+ >>> 11 in r
+ False
+ >>> 10 in r
+ True
+ >>> r.index(10)
+ 5
+ >>> r[5]
+ 10
+ >>> r[:5]
+ range(0, 10, 2)
+ >>> r[-1]
+ 18
+
+Testing range objects for equality with ``==`` and ``!=`` compares
+them as sequences. That is, two range objects are considered equal if
+they represent the same sequence of values. (Note that two range
+objects that compare equal might have different :attr:`start`,
+:attr:`stop` and :attr:`step` attributes, for example ``range(0) ==
+range(2, 1, 3)`` or ``range(0, 3, 2) == range(0, 4, 2)``.)
+
+.. versionchanged:: 3.2
+ Implement the Sequence ABC.
+ Support slicing and negative indices.
+ Test :class:`int` objects for membership in constant time instead of
+ iterating through all items.
+
+.. versionchanged:: 3.3
+ Define '==' and '!=' to compare range objects based on the
+ sequence of values they define (instead of comparing based on
+ object identity).
+
+.. versionadded:: 3.3
+ The :attr:`start`, :attr:`stop` and :attr:`step` attributes.
+
+
+.. _textseq:
+
+Text Sequence Type --- :class:`str`
+===================================
+
+.. index::
+ object: string
+ object: bytes
+ object: bytearray
+ object: io.StringIO
+
+
+Textual data in Python is handled with ``str`` objects, which are immutable
+:ref:`sequences <typesseq>` of Unicode code points. String literals are
+written in a variety of ways:
+
+* Single quotes: ``'allows embedded "double" quotes'``
+* Double quotes: ``"allows embedded 'single' quotes"``.
+* Triple quoted: ``'''Three single quotes'''``, ``"""Three double quotes"""``
+
+Triple quoted strings may span multiple lines - all associated whitespace will
+be included in the string literal.
+
+String literals that are part of a single expression and have only whitespace
+between them will be implicitly converted to a single string literal. That
+is, ``("spam " "eggs") == "spam eggs"``.
+
+See :ref:`strings` for more about the various forms of string literal,
+including supported escape sequences, and the ``r`` ("raw") prefix that
+disables most escape sequence processing.
+
+Strings may also be created from other objects with the built-in
+function :func:`str`.
+
+Since there is no separate "character" type, indexing a string produces
+strings of length 1. That is, for a non-empty string *s*, ``s[0] == s[0:1]``.
+
+There is also no mutable string type, but :meth:`str.join` or
+:class:`io.StringIO` can be used to efficiently construct strings from
+multiple fragments.
+
+.. versionchanged:: 3.3
+ For backwards compatibility with the Python 2 series, the ``u`` prefix is
+ once again permitted on string literals. It has no effect on the meaning
+ of string literals and cannot be combined with the ``r`` prefix.
+
.. _string-methods:
String Methods
--------------
-.. index:: pair: string; methods
+.. index::
+ pair: string; methods
+ module: re
-String objects support the methods listed below.
+Strings implement all of the :ref:`common <typesseq-common>` sequence
+operations, along with the additional methods described below.
-In addition, Python's strings support the sequence type methods described in the
-:ref:`typesseq` section. To output formatted strings, see the
-:ref:`string-formatting` section. Also, see the :mod:`re` module for string
-functions based on regular expressions.
+Strings also support two styles of string formatting, one providing a large
+degree of flexibility and customization (see :meth:`str.format`,
+:ref:`formatstrings` and :ref:`string-formatting`) and the other based on C
+``printf`` style formatting that handles a narrower range of types and is
+slightly harder to use correctly, but is often faster for the cases it can
+handle (:ref:`old-string-formatting`).
+
+The :ref:`textservices` section of the standard library covers a number of
+other modules that provide various text related utilities (including regular
+expression support in the :mod:`re` module).
.. method:: str.capitalize()
@@ -1000,6 +1421,23 @@ functions based on regular expressions.
rest lowercased.
+.. method:: str.casefold()
+
+ Return a casefolded copy of the string. Casefolded strings may be used for
+ caseless matching.
+
+ Casefolding is similar to lowercasing but more aggressive because it is
+ intended to remove all case distinctions in a string. For example, the German
+ lowercase letter ``'ß'`` is equivalent to ``"ss"``. Since it is already
+ lowercase, :meth:`lower` would do nothing to ``'ß'``; :meth:`casefold`
+ converts it to ``"ss"``.
+
+ The casefolding algorithm is described in section 3.13 of the Unicode
+ Standard.
+
+ .. versionadded:: 3.3
+
+
.. method:: str.center(width[, fillchar])
Return centered in a string of length *width*. Padding is done using the
@@ -1207,6 +1645,9 @@ functions based on regular expressions.
Return a copy of the string with all the cased characters [4]_ converted to
lowercase.
+ The lowercasing algorithm used is described in section 3.13 of the Unicode
+ Standard.
+
.. method:: str.lstrip([chars])
@@ -1279,7 +1720,7 @@ functions based on regular expressions.
two empty strings, followed by the string itself.
-.. method:: str.rsplit([sep[, maxsplit]])
+.. method:: str.rsplit(sep=None, maxsplit=-1)
Return a list of the words in the string, using *sep* as the delimiter string.
If *maxsplit* is given, at most *maxsplit* splits are done, the *rightmost*
@@ -1301,7 +1742,7 @@ functions based on regular expressions.
'mississ'
-.. method:: str.split([sep[, maxsplit]])
+.. method:: str.split(sep=None, maxsplit=-1)
Return a list of the words in the string, using *sep* as the delimiter
string. If *maxsplit* is given, at most *maxsplit* splits are done (thus,
@@ -1370,7 +1811,8 @@ functions based on regular expressions.
.. method:: str.swapcase()
Return a copy of the string with uppercase characters converted to lowercase and
- vice versa.
+ vice versa. Note that it is not necessarily true that
+ ``s.swapcase().swapcase() == s``.
.. method:: str.title()
@@ -1421,7 +1863,11 @@ functions based on regular expressions.
Return a copy of the string with all the cased characters [4]_ converted to
uppercase. Note that ``str.upper().isupper()`` might be ``False`` if ``s``
contains uncased characters or if the Unicode category of the resulting
- character(s) is not "Lu" (Letter, uppercase), but e.g. "Lt" (Letter, titlecase).
+ character(s) is not "Lu" (Letter, uppercase), but e.g. "Lt" (Letter,
+ titlecase).
+
+ The uppercasing algorithm used is described in section 3.13 of the Unicode
+ Standard.
.. method:: str.zfill(width)
@@ -1434,8 +1880,8 @@ functions based on regular expressions.
.. _old-string-formatting:
-Old String Formatting Operations
---------------------------------
+``printf``-style String Formatting
+----------------------------------
.. index::
single: formatting, string (%)
@@ -1447,23 +1893,19 @@ Old String Formatting Operations
single: % formatting
single: % interpolation
-.. XXX is the note enough?
-
.. note::
- The formatting operations described here are modelled on C's printf()
- syntax. They only support formatting of certain builtin types. The
- use of a binary operator means that care may be needed in order to
- format tuples and dictionaries correctly. As the new
- :ref:`string-formatting` syntax is more flexible and handles tuples and
- dictionaries naturally, it is recommended for new code. However, there
- are no current plans to deprecate printf-style formatting.
+ The formatting operations described here exhibit a variety of quirks that
+ lead to a number of common errors (such as failing to display tuples and
+ dictionaries correctly). Using the newer :meth:`str.format` interface
+ helps avoid these errors, and also provides a generally more powerful,
+ flexible and extensible approach to formatting text.
String objects have one unique built-in operation: the ``%`` operator (modulo).
This is also known as the string *formatting* or *interpolation* operator.
Given ``format % values`` (where *format* is a string), ``%`` conversion
specifications in *format* are replaced with zero or more elements of *values*.
-The effect is similar to the using :c:func:`sprintf` in the C language.
+The effect is similar to using the :c:func:`sprintf` in the C language.
If *format* requires a single argument, *values* may be a single non-tuple
object. [5]_ Otherwise, *values* must be a tuple with exactly the number of
@@ -1621,211 +2063,178 @@ that ``'\0'`` is the end of the string.
``%f`` conversions for numbers whose absolute value is over 1e50 are no
longer replaced by ``%g`` conversions.
-.. index::
- module: string
- module: re
-Additional string operations are defined in standard modules :mod:`string` and
-:mod:`re`.
+.. _binaryseq:
+Binary Sequence Types --- :class:`bytes`, :class:`bytearray`, :class:`memoryview`
+=================================================================================
-.. _typesseq-range:
+.. index::
+ object: bytes
+ object: bytearray
+ object: memoryview
+ module: array
-Range Type
-----------
+The core built-in types for manipulating binary data are :class:`bytes` and
+:class:`bytearray`. They are supported by :class:`memoryview` which uses
+the buffer protocol to access the memory of other binary objects without
+needing to make a copy.
-.. index:: object: range
+The :mod:`array` module supports efficient storage of basic data types like
+32-bit integers and IEEE754 double-precision floating values.
-The :class:`range` type is an immutable sequence which is commonly used for
-looping. The advantage of the :class:`range` type is that an :class:`range`
-object will always take the same amount of memory, no matter the size of the
-range it represents.
+.. _typebytes:
-Range objects have relatively little behavior: they support indexing, contains,
-iteration, the :func:`len` function, and the following methods:
+Bytes
+-----
-.. method:: range.count(x)
+.. index:: object: bytes
- Return the number of *i*'s for which ``s[i] == x``.
+Bytes objects are immutable sequences of single bytes. Since many major
+binary protocols are based on the ASCII text encoding, bytes objects offer
+several methods that are only valid when working with ASCII compatible
+data and are closely related to string objects in a variety of other ways.
- .. versionadded:: 3.2
+Firstly, the syntax for bytes literals is largely the same as that for string
+literals, except that a ``b`` prefix is added:
-.. method:: range.index(x)
+* Single quotes: ``b'still allows embedded "double" quotes'``
+* Double quotes: ``b"still allows embedded 'single' quotes"``.
+* Triple quoted: ``b'''3 single quotes'''``, ``b"""3 double quotes"""``
- Return the smallest *i* such that ``s[i] == x``. Raises
- :exc:`ValueError` when *x* is not in the range.
+Only ASCII characters are permitted in bytes literals (regardless of the
+declared source code encoding). Any binary values over 127 must be entered
+into bytes literals using the appropriate escape sequence.
- .. versionadded:: 3.2
+As with string literals, bytes literals may also use a ``r`` prefix to disable
+processing of escape sequences. See :ref:`strings` for more about the various
+forms of bytes literal, including supported escape sequences.
-.. _typesseq-mutable:
+While bytes literals and representations are based on ASCII text, bytes
+objects actually behave like immutable sequences of integers, with each
+value in the sequence restricted such that ``0 <= x < 256`` (attempts to
+violate this restriction will trigger :exc:`ValueError`. This is done
+deliberately to emphasise that while many binary formats include ASCII based
+elements and can be usefully manipulated with some text-oriented algorithms,
+this is not generally the case for arbitrary binary data (blindly applying
+text processing algorithms to binary data formats that are not ASCII
+compatible will usually lead to data corruption).
-Mutable Sequence Types
-----------------------
+In addition to the literal forms, bytes objects can be created in a number of
+other ways:
-.. index::
- triple: mutable; sequence; types
- object: list
- object: bytearray
+* A zero-filled bytes object of a specified length: ``bytes(10)``
+* From an iterable of integers: ``bytes(range(20))``
+* Copying existing binary data via the buffer protocol: ``bytes(obj)``
-List and bytearray objects support additional operations that allow in-place
-modification of the object. Other mutable sequence types (when added to the
-language) should also support these operations. Strings and tuples are
-immutable sequence types: such objects cannot be modified once created. The
-following operations are defined on mutable sequence types (where *x* is an
-arbitrary object).
+Also see the :ref:`bytes <func-bytes>` built-in.
-Note that while lists allow their items to be of any type, bytearray object
-"items" are all integers in the range 0 <= x < 256.
+Since bytes objects are sequences of integers, for a bytes object *b*,
+``b[0]`` will be an integer, while ``b[0:1]`` will be a bytes object of
+length 1. (This contrasts with text strings, where both indexing and
+slicing will produce a string of length 1)
-.. index::
- triple: operations on; sequence; types
- triple: operations on; list; type
- pair: subscript; assignment
- pair: slice; assignment
- statement: del
- single: append() (sequence method)
- single: extend() (sequence method)
- single: count() (sequence method)
- single: index() (sequence method)
- single: insert() (sequence method)
- single: pop() (sequence method)
- single: remove() (sequence method)
- single: reverse() (sequence method)
- single: sort() (sequence method)
+The representation of bytes objects uses the literal format (``b'...'``)
+since it is often more useful than e.g. ``bytes([46, 46, 46])``. You can
+always convert a bytes object into a list of integers using ``list(b)``.
-+------------------------------+--------------------------------+---------------------+
-| Operation | Result | Notes |
-+==============================+================================+=====================+
-| ``s[i] = x`` | item *i* of *s* is replaced by | |
-| | *x* | |
-+------------------------------+--------------------------------+---------------------+
-| ``s[i:j] = t`` | slice of *s* from *i* to *j* | |
-| | is replaced by the contents of | |
-| | the iterable *t* | |
-+------------------------------+--------------------------------+---------------------+
-| ``del s[i:j]`` | same as ``s[i:j] = []`` | |
-+------------------------------+--------------------------------+---------------------+
-| ``s[i:j:k] = t`` | the elements of ``s[i:j:k]`` | \(1) |
-| | are replaced by those of *t* | |
-+------------------------------+--------------------------------+---------------------+
-| ``del s[i:j:k]`` | removes the elements of | |
-| | ``s[i:j:k]`` from the list | |
-+------------------------------+--------------------------------+---------------------+
-| ``s.append(x)`` | same as ``s[len(s):len(s)] = | |
-| | [x]`` | |
-+------------------------------+--------------------------------+---------------------+
-| ``s.extend(x)`` | same as ``s[len(s):len(s)] = | \(2) |
-| | x`` | |
-+------------------------------+--------------------------------+---------------------+
-| ``s.count(x)`` | return number of *i*'s for | |
-| | which ``s[i] == x`` | |
-+------------------------------+--------------------------------+---------------------+
-| ``s.index(x[, i[, j]])`` | return smallest *k* such that | \(3) |
-| | ``s[k] == x`` and ``i <= k < | |
-| | j`` | |
-+------------------------------+--------------------------------+---------------------+
-| ``s.insert(i, x)`` | same as ``s[i:i] = [x]`` | \(4) |
-+------------------------------+--------------------------------+---------------------+
-| ``s.pop([i])`` | same as ``x = s[i]; del s[i]; | \(5) |
-| | return x`` | |
-+------------------------------+--------------------------------+---------------------+
-| ``s.remove(x)`` | same as ``del s[s.index(x)]`` | \(3) |
-+------------------------------+--------------------------------+---------------------+
-| ``s.reverse()`` | reverses the items of *s* in | \(6) |
-| | place | |
-+------------------------------+--------------------------------+---------------------+
-| ``s.sort([key[, reverse]])`` | sort the items of *s* in place | (6), (7), (8) |
-+------------------------------+--------------------------------+---------------------+
-
-
-Notes:
-
-(1)
- *t* must have the same length as the slice it is replacing.
-(2)
- *x* can be any iterable object.
-
-(3)
- Raises :exc:`ValueError` when *x* is not found in *s*. When a negative index is
- passed as the second or third parameter to the :meth:`index` method, the sequence
- length is added, as for slice indices. If it is still negative, it is truncated
- to zero, as for slice indices.
-
-(4)
- When a negative index is passed as the first parameter to the :meth:`insert`
- method, the sequence length is added, as for slice indices. If it is still
- negative, it is truncated to zero, as for slice indices.
-
-(5)
- The optional argument *i* defaults to ``-1``, so that by default the last
- item is removed and returned.
+.. note::
+ For Python 2.x users: In the Python 2.x series, a variety of implicit
+ conversions between 8-bit strings (the closest thing 2.x offers to a
+ built-in binary data type) and Unicode strings were permitted. This was a
+ backwards compatibility workaround to account for the fact that Python
+ originally only supported 8-bit text, and Unicode text was a later
+ addition. In Python 3.x, those implicit conversions are gone - conversions
+ between 8-bit binary data and Unicode text must be explicit, and bytes and
+ string objects will always compare unequal.
-(6)
- The :meth:`sort` and :meth:`reverse` methods modify the sequence in place for
- economy of space when sorting or reversing a large sequence. To remind you
- that they operate by side effect, they don't return the sorted or reversed
- sequence.
-(7)
- The :meth:`sort` method takes optional arguments for controlling the
- comparisons. Each must be specified as a keyword argument.
+.. _typebytearray:
- *key* specifies a function of one argument that is used to extract a comparison
- key from each list element: ``key=str.lower``. The default value is ``None``.
- Use :func:`functools.cmp_to_key` to convert an
- old-style *cmp* function to a *key* function.
+Bytearray Objects
+-----------------
+.. index:: object: bytearray
- *reverse* is a boolean value. If set to ``True``, then the list elements are
- sorted as if each comparison were reversed.
+:class:`bytearray` objects are a mutable counterpart to :class:`bytes`
+objects. There is no dedicated literal syntax for bytearray objects, instead
+they are always created by calling the constructor:
- The :meth:`sort` method is guaranteed to be stable. A
- sort is stable if it guarantees not to change the relative order of elements
- that compare equal --- this is helpful for sorting in multiple passes (for
- example, sort by department, then by salary grade).
+* Creating an empty instance: ``bytearray()``
+* Creating a zero-filled instance with a given length: ``bytearray(10)``
+* From an iterable of integers: ``bytearray(range(20))``
+* Copying existing binary data via the buffer protocol: ``bytearray(b'Hi!)``
- .. impl-detail::
+As bytearray objects are mutable, they support the
+:ref:`mutable <typesseq-mutable>` sequence operations in addition to the
+common bytes and bytearray operations described in :ref:`bytes-methods`.
- While a list is being sorted, the effect of attempting to mutate, or even
- inspect, the list is undefined. The C implementation of Python makes the
- list appear empty for the duration, and raises :exc:`ValueError` if it can
- detect that the list has been mutated during a sort.
-
-(8)
- :meth:`sort` is not supported by :class:`bytearray` objects.
+Also see the :ref:`bytearray <func-bytearray>` built-in.
.. _bytes-methods:
-Bytes and Byte Array Methods
-----------------------------
+Bytes and Bytearray Operations
+------------------------------
.. index:: pair: bytes; methods
pair: bytearray; methods
-Bytes and bytearray objects, being "strings of bytes", have all methods found on
-strings, with the exception of :func:`encode`, :func:`format` and
-:func:`isidentifier`, which do not make sense with these types. For converting
-the objects to strings, they have a :func:`decode` method.
+Both bytes and bytearray objects support the :ref:`common <typesseq-common>`
+sequence operations. They interoperate not just with operands of the same
+type, but with any object that supports the
+:ref:`buffer protocol <bufferobjects>`. Due to this flexibility, they can be
+freely mixed in operations without causing errors. However, the return type
+of the result may depend on the order of operands.
+
+Due to the common use of ASCII text as the basis for binary protocols, bytes
+and bytearray objects provide almost all methods found on text strings, with
+the exceptions of:
-Wherever one of these methods needs to interpret the bytes as characters
-(e.g. the :func:`is...` methods), the ASCII character set is assumed.
+* :meth:`str.encode` (which converts text strings to bytes objects)
+* :meth:`str.format` and :meth:`str.format_map` (which are used to format
+ text for display to users)
+* :meth:`str.isidentifier`, :meth:`str.isnumeric`, :meth:`str.isdecimal`,
+ :meth:`str.isprintable` (which are used to check various properties of
+ text strings which are not typically applicable to binary protocols).
+
+All other string methods are supported, although sometimes with slight
+differences in functionality and semantics (as described below).
.. note::
The methods on bytes and bytearray objects don't accept strings as their
arguments, just as the methods on strings don't accept bytes as their
- arguments. For example, you have to write ::
+ arguments. For example, you have to write::
a = "abc"
b = a.replace("a", "f")
- and ::
+ and::
a = b"abc"
b = a.replace(b"a", b"f")
+Whenever a bytes or bytearray method needs to interpret the bytes as
+characters (e.g. the :meth:`is...` methods, :meth:`split`, :meth:`strip`),
+the ASCII character set is assumed (text strings use Unicode semantics).
+
+.. note::
+ Using these ASCII based methods to manipulate binary data that is not
+ stored in an ASCII based format may lead to data corruption.
+
+The search operations (:keyword:`in`, :meth:`count`, :meth:`find`,
+:meth:`index`, :meth:`rfind` and :meth:`rindex`) all accept both integers
+in the range 0 to 255 (inclusive) as well as bytes and byte array sequences.
+
+.. versionchanged:: 3.3
+ All of the search methods also accept an integer in the range 0 to 255
+ (inclusive) as their first argument.
+
+
+Each bytes and bytearray instance provides a :meth:`decode` convenience
+method that is the inverse of :meth:`str.encode`:
.. method:: bytes.decode(encoding="utf-8", errors="strict")
bytearray.decode(encoding="utf-8", errors="strict")
@@ -1841,8 +2250,10 @@ Wherever one of these methods needs to interpret the bytes as characters
.. versionchanged:: 3.1
Added support for keyword arguments.
-
-The bytes and bytearray types have an additional class method:
+Since 2 hexadecimal digits correspond precisely to a single byte, hexadecimal
+numbers are a commonly used format for describing binary data. Accordingly,
+the bytes and bytearray types have an additional class method to read data in
+that format:
.. classmethod:: bytes.fromhex(string)
bytearray.fromhex(string)
@@ -1851,8 +2262,8 @@ The bytes and bytearray types have an additional class method:
decoding the given string object. The string must contain two hexadecimal
digits per byte, spaces are ignored.
- >>> bytes.fromhex('f0 f1f2 ')
- b'\xf0\xf1\xf2'
+ >>> bytes.fromhex('2Ef0 F1f2 ')
+ b'.\xf0\xf1\xf2'
The maketrans and translate methods differ in semantics from the versions
@@ -1886,6 +2297,434 @@ available on strings:
.. versionadded:: 3.1
+.. _typememoryview:
+
+Memory Views
+------------
+
+:class:`memoryview` objects allow Python code to access the internal data
+of an object that supports the :ref:`buffer protocol <bufferobjects>` without
+copying.
+
+.. class:: memoryview(obj)
+
+ Create a :class:`memoryview` that references *obj*. *obj* must support the
+ buffer protocol. Built-in objects that support the buffer protocol include
+ :class:`bytes` and :class:`bytearray`.
+
+ A :class:`memoryview` has the notion of an *element*, which is the
+ atomic memory unit handled by the originating object *obj*. For many
+ simple types such as :class:`bytes` and :class:`bytearray`, an element
+ is a single byte, but other types such as :class:`array.array` may have
+ bigger elements.
+
+ ``len(view)`` is equal to the length of :class:`~memoryview.tolist`.
+ If ``view.ndim = 0``, the length is 1. If ``view.ndim = 1``, the length
+ is equal to the number of elements in the view. For higher dimensions,
+ the length is equal to the length of the nested list representation of
+ the view. The :class:`~memoryview.itemsize` attribute will give you the
+ number of bytes in a single element.
+
+ A :class:`memoryview` supports slicing to expose its data. If
+ :class:`~memoryview.format` is one of the native format specifiers
+ from the :mod:`struct` module, indexing will return a single element
+ with the correct type. Full slicing will result in a subview::
+
+ >>> v = memoryview(b'abcefg')
+ >>> v[1]
+ 98
+ >>> v[-1]
+ 103
+ >>> v[1:4]
+ <memory at 0x7f3ddc9f4350>
+ >>> bytes(v[1:4])
+ b'bce'
+
+ Other native formats::
+
+ >>> import array
+ >>> a = array.array('l', [-11111111, 22222222, -33333333, 44444444])
+ >>> a[0]
+ -11111111
+ >>> a[-1]
+ 44444444
+ >>> a[2:3].tolist()
+ [-33333333]
+ >>> a[::2].tolist()
+ [-11111111, -33333333]
+ >>> a[::-1].tolist()
+ [44444444, -33333333, 22222222, -11111111]
+
+ .. versionadded:: 3.3
+
+ If the underlying object is writable, the memoryview supports slice
+ assignment. Resizing is not allowed::
+
+ >>> data = bytearray(b'abcefg')
+ >>> v = memoryview(data)
+ >>> v.readonly
+ False
+ >>> v[0] = ord(b'z')
+ >>> data
+ bytearray(b'zbcefg')
+ >>> v[1:4] = b'123'
+ >>> data
+ bytearray(b'z123fg')
+ >>> v[2:3] = b'spam'
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ ValueError: memoryview assignment: lvalue and rvalue have different structures
+ >>> v[2:6] = b'spam'
+ >>> data
+ bytearray(b'z1spam')
+
+ One-dimensional memoryviews of hashable (read-only) types with formats
+ 'B', 'b' or 'c' are also hashable. The hash is defined as
+ ``hash(m) == hash(m.tobytes())``::
+
+ >>> v = memoryview(b'abcefg')
+ >>> hash(v) == hash(b'abcefg')
+ True
+ >>> hash(v[2:4]) == hash(b'ce')
+ True
+ >>> hash(v[::-2]) == hash(b'abcefg'[::-2])
+ True
+
+ .. versionchanged:: 3.3
+ One-dimensional memoryviews with formats 'B', 'b' or 'c' are now hashable.
+
+ .. note::
+ Hashing of memoryviews with formats other than 'B', 'b' or 'c' as well
+ as hashing of multi-dimensional memoryviews is possible in version 3.3.0,
+ but will raise an error in 3.3.1 in order to be compatible with the new
+ memoryview equality definition.
+
+ :class:`memoryview` has several methods:
+
+ .. method:: __eq__(exporter)
+
+ A memoryview and a :pep:`3118` exporter are equal if their shapes are
+ equivalent and if all corresponding values are equal when the operands'
+ respective format codes are interpreted using :mod:`struct` syntax.
+
+ For the subset of :mod:`struct` format strings currently supported by
+ :meth:`tolist`, ``v`` and ``w`` are equal if ``v.tolist() == w.tolist()``::
+
+ >>> import array
+ >>> a = array.array('I', [1, 2, 3, 4, 5])
+ >>> b = array.array('d', [1.0, 2.0, 3.0, 4.0, 5.0])
+ >>> c = array.array('b', [5, 3, 1])
+ >>> x = memoryview(a)
+ >>> y = memoryview(b)
+ >>> x == a == y == b
+ True
+ >>> x.tolist() == a.tolist() == y.tolist() == b.tolist()
+ True
+ >>> z = y[::-2]
+ >>> z == c
+ True
+ >>> z.tolist() == c.tolist()
+ True
+
+ If either format string is not supported by the :mod:`struct` module,
+ then the objects will always compare as unequal (even if the format
+ strings and buffer contents are identical)::
+
+ >>> from ctypes import BigEndianStructure, c_long
+ >>> class BEPoint(BigEndianStructure):
+ ... _fields_ = [("x", c_long), ("y", c_long)]
+ ...
+ >>> point = BEPoint(100, 200)
+ >>> a = memoryview(point)
+ >>> b = memoryview(point)
+ >>> a == point
+ False
+ >>> a == b
+ False
+
+ Note that, as with floating point numbers, ``v is w`` does *not* imply
+ ``v == w`` for memoryview objects.
+
+ .. versionchanged:: 3.3
+ Previous versions compared the raw memory disregarding the item format
+ and the logical array structure.
+
+ .. method:: tobytes()
+
+ Return the data in the buffer as a bytestring. This is equivalent to
+ calling the :class:`bytes` constructor on the memoryview. ::
+
+ >>> m = memoryview(b"abc")
+ >>> m.tobytes()
+ b'abc'
+ >>> bytes(m)
+ b'abc'
+
+ For non-contiguous arrays the result is equal to the flattened list
+ representation with all elements converted to bytes. :meth:`tobytes`
+ supports all format strings, including those that are not in
+ :mod:`struct` module syntax.
+
+ .. method:: tolist()
+
+ Return the data in the buffer as a list of elements. ::
+
+ >>> memoryview(b'abc').tolist()
+ [97, 98, 99]
+ >>> import array
+ >>> a = array.array('d', [1.1, 2.2, 3.3])
+ >>> m = memoryview(a)
+ >>> m.tolist()
+ [1.1, 2.2, 3.3]
+
+ .. versionchanged:: 3.3
+ :meth:`tolist` now supports all single character native formats in
+ :mod:`struct` module syntax as well as multi-dimensional
+ representations.
+
+ .. method:: release()
+
+ Release the underlying buffer exposed by the memoryview object. Many
+ objects take special actions when a view is held on them (for example,
+ a :class:`bytearray` would temporarily forbid resizing); therefore,
+ calling release() is handy to remove these restrictions (and free any
+ dangling resources) as soon as possible.
+
+ After this method has been called, any further operation on the view
+ raises a :class:`ValueError` (except :meth:`release()` itself which can
+ be called multiple times)::
+
+ >>> m = memoryview(b'abc')
+ >>> m.release()
+ >>> m[0]
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ ValueError: operation forbidden on released memoryview object
+
+ The context management protocol can be used for a similar effect,
+ using the ``with`` statement::
+
+ >>> with memoryview(b'abc') as m:
+ ... m[0]
+ ...
+ 97
+ >>> m[0]
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ ValueError: operation forbidden on released memoryview object
+
+ .. versionadded:: 3.2
+
+ .. method:: cast(format[, shape])
+
+ Cast a memoryview to a new format or shape. *shape* defaults to
+ ``[byte_length//new_itemsize]``, which means that the result view
+ will be one-dimensional. The return value is a new memoryview, but
+ the buffer itself is not copied. Supported casts are 1D -> C-contiguous
+ and C-contiguous -> 1D.
+
+ Both formats are restricted to single element native formats in
+ :mod:`struct` syntax. One of the formats must be a byte format
+ ('B', 'b' or 'c'). The byte length of the result must be the same
+ as the original length.
+
+ Cast 1D/long to 1D/unsigned bytes::
+
+ >>> import array
+ >>> a = array.array('l', [1,2,3])
+ >>> x = memoryview(a)
+ >>> x.format
+ 'l'
+ >>> x.itemsize
+ 8
+ >>> len(x)
+ 3
+ >>> x.nbytes
+ 24
+ >>> y = x.cast('B')
+ >>> y.format
+ 'B'
+ >>> y.itemsize
+ 1
+ >>> len(y)
+ 24
+ >>> y.nbytes
+ 24
+
+ Cast 1D/unsigned bytes to 1D/char::
+
+ >>> b = bytearray(b'zyz')
+ >>> x = memoryview(b)
+ >>> x[0] = b'a'
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ ValueError: memoryview: invalid value for format "B"
+ >>> y = x.cast('c')
+ >>> y[0] = b'a'
+ >>> b
+ bytearray(b'ayz')
+
+ Cast 1D/bytes to 3D/ints to 1D/signed char::
+
+ >>> import struct
+ >>> buf = struct.pack("i"*12, *list(range(12)))
+ >>> x = memoryview(buf)
+ >>> y = x.cast('i', shape=[2,2,3])
+ >>> y.tolist()
+ [[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]
+ >>> y.format
+ 'i'
+ >>> y.itemsize
+ 4
+ >>> len(y)
+ 2
+ >>> y.nbytes
+ 48
+ >>> z = y.cast('b')
+ >>> z.format
+ 'b'
+ >>> z.itemsize
+ 1
+ >>> len(z)
+ 48
+ >>> z.nbytes
+ 48
+
+ Cast 1D/unsigned char to to 2D/unsigned long::
+
+ >>> buf = struct.pack("L"*6, *list(range(6)))
+ >>> x = memoryview(buf)
+ >>> y = x.cast('L', shape=[2,3])
+ >>> len(y)
+ 2
+ >>> y.nbytes
+ 48
+ >>> y.tolist()
+ [[0, 1, 2], [3, 4, 5]]
+
+ .. versionadded:: 3.3
+
+ There are also several readonly attributes available:
+
+ .. attribute:: obj
+
+ The underlying object of the memoryview::
+
+ >>> b = bytearray(b'xyz')
+ >>> m = memoryview(b)
+ >>> m.obj is b
+ True
+
+ .. versionadded:: 3.3
+
+ .. attribute:: nbytes
+
+ ``nbytes == product(shape) * itemsize == len(m.tobytes())``. This is
+ the amount of space in bytes that the array would use in a contiguous
+ representation. It is not necessarily equal to len(m)::
+
+ >>> import array
+ >>> a = array.array('i', [1,2,3,4,5])
+ >>> m = memoryview(a)
+ >>> len(m)
+ 5
+ >>> m.nbytes
+ 20
+ >>> y = m[::2]
+ >>> len(y)
+ 3
+ >>> y.nbytes
+ 12
+ >>> len(y.tobytes())
+ 12
+
+ Multi-dimensional arrays::
+
+ >>> import struct
+ >>> buf = struct.pack("d"*12, *[1.5*x for x in range(12)])
+ >>> x = memoryview(buf)
+ >>> y = x.cast('d', shape=[3,4])
+ >>> y.tolist()
+ [[0.0, 1.5, 3.0, 4.5], [6.0, 7.5, 9.0, 10.5], [12.0, 13.5, 15.0, 16.5]]
+ >>> len(y)
+ 3
+ >>> y.nbytes
+ 96
+
+ .. versionadded:: 3.3
+
+ .. attribute:: readonly
+
+ A bool indicating whether the memory is read only.
+
+ .. attribute:: format
+
+ A string containing the format (in :mod:`struct` module style) for each
+ element in the view. A memoryview can be created from exporters with
+ arbitrary format strings, but some methods (e.g. :meth:`tolist`) are
+ restricted to native single element formats.
+
+ .. versionchanged:: 3.3
+ format ``'B'`` is now handled according to the struct module syntax.
+ This means that ``memoryview(b'abc')[0] == b'abc'[0] == 97``.
+
+ .. attribute:: itemsize
+
+ The size in bytes of each element of the memoryview::
+
+ >>> import array, struct
+ >>> m = memoryview(array.array('H', [32000, 32001, 32002]))
+ >>> m.itemsize
+ 2
+ >>> m[0]
+ 32000
+ >>> struct.calcsize('H') == m.itemsize
+ True
+
+ .. attribute:: ndim
+
+ An integer indicating how many dimensions of a multi-dimensional array the
+ memory represents.
+
+ .. attribute:: shape
+
+ A tuple of integers the length of :attr:`ndim` giving the shape of the
+ memory as an N-dimensional array.
+
+ .. versionchanged:: 3.3
+ An empty tuple instead of None when ndim = 0.
+
+ .. attribute:: strides
+
+ A tuple of integers the length of :attr:`ndim` giving the size in bytes to
+ access each element for each dimension of the array.
+
+ .. versionchanged:: 3.3
+ An empty tuple instead of None when ndim = 0.
+
+ .. attribute:: suboffsets
+
+ Used internally for PIL-style arrays. The value is informational only.
+
+ .. attribute:: c_contiguous
+
+ A bool indicating whether the memory is C-contiguous.
+
+ .. versionadded:: 3.3
+
+ .. attribute:: f_contiguous
+
+ A bool indicating whether the memory is Fortran contiguous.
+
+ .. versionadded:: 3.3
+
+ .. attribute:: contiguous
+
+ A bool indicating whether the memory is contiguous.
+
+ .. versionadded:: 3.3
+
+
.. _types-set:
Set Types --- :class:`set`, :class:`frozenset`
@@ -1897,7 +2736,7 @@ A :dfn:`set` object is an unordered collection of distinct :term:`hashable` obje
Common uses include membership testing, removing duplicates from a sequence, and
computing mathematical operations such as intersection, union, difference, and
symmetric difference.
-(For other containers see the built in :class:`dict`, :class:`list`,
+(For other containers see the built-in :class:`dict`, :class:`list`,
and :class:`tuple` classes, and the :mod:`collections` module.)
Like other collections, sets support ``x in set``, ``len(set)``, and ``for x in
@@ -2097,7 +2936,7 @@ Mapping Types --- :class:`dict`
A :dfn:`mapping` object maps :term:`hashable` values to arbitrary objects.
Mappings are mutable objects. There is currently only one standard mapping
-type, the :dfn:`dictionary`. (For other containers see the built in
+type, the :dfn:`dictionary`. (For other containers see the built-in
:class:`list`, :class:`set`, and :class:`tuple` classes, and the
:mod:`collections` module.)
@@ -2229,13 +3068,13 @@ pairs within braces, for example: ``{'jack': 4098, 'sjoerd': 4127}`` or ``{4098:
.. method:: items()
- Return a new view of the dictionary's items (``(key, value)`` pairs). See
- below for documentation of view objects.
+ Return a new view of the dictionary's items (``(key, value)`` pairs).
+ See the :ref:`documentation of view objects <dict-views>`.
.. method:: keys()
- Return a new view of the dictionary's keys. See below for documentation of
- view objects.
+ Return a new view of the dictionary's keys. See the :ref:`documentation
+ of view objects <dict-views>`.
.. method:: pop(key[, default])
@@ -2269,8 +3108,12 @@ pairs within braces, for example: ``{'jack': 4098, 'sjoerd': 4127}`` or ``{4098:
.. method:: values()
- Return a new view of the dictionary's values. See below for documentation of
- view objects.
+ Return a new view of the dictionary's values. See the
+ :ref:`documentation of view objects <dict-views>`.
+
+.. seealso::
+ :class:`types.MappingProxyType` can be used to create a read-only view
+ of a :class:`dict`.
.. _dict-views:
@@ -2316,7 +3159,7 @@ Keys views are set-like since their entries are unique and hashable. If all
values are hashable, so that ``(key, value)`` pairs are unique and hashable,
then the items view is also set-like. (Values views are not treated as set-like
since the entries are generally not unique.) For set-like views, all of the
-operations defined for the abstract base class :class:`collections.Set` are
+operations defined for the abstract base class :class:`collections.abc.Set` are
available (for example, ``==``, ``<``, or ``^``).
An example of dictionary view usage::
@@ -2351,159 +3194,6 @@ An example of dictionary view usage::
{'juice', 'sausage', 'bacon', 'spam'}
-.. _typememoryview:
-
-memoryview type
-===============
-
-:class:`memoryview` objects allow Python code to access the internal data
-of an object that supports the :ref:`buffer protocol <bufferobjects>` without
-copying. Memory is generally interpreted as simple bytes.
-
-.. class:: memoryview(obj)
-
- Create a :class:`memoryview` that references *obj*. *obj* must support the
- buffer protocol. Built-in objects that support the buffer protocol include
- :class:`bytes` and :class:`bytearray`.
-
- A :class:`memoryview` has the notion of an *element*, which is the
- atomic memory unit handled by the originating object *obj*. For many
- simple types such as :class:`bytes` and :class:`bytearray`, an element
- is a single byte, but other types such as :class:`array.array` may have
- bigger elements.
-
- ``len(view)`` returns the total number of elements in the memoryview,
- *view*. The :class:`~memoryview.itemsize` attribute will give you the
- number of bytes in a single element.
-
- A :class:`memoryview` supports slicing to expose its data. Taking a single
- index will return a single element as a :class:`bytes` object. Full
- slicing will result in a subview::
-
- >>> v = memoryview(b'abcefg')
- >>> v[1]
- b'b'
- >>> v[-1]
- b'g'
- >>> v[1:4]
- <memory at 0x77ab28>
- >>> bytes(v[1:4])
- b'bce'
-
- If the object the memoryview is over supports changing its data, the
- memoryview supports slice assignment::
-
- >>> data = bytearray(b'abcefg')
- >>> v = memoryview(data)
- >>> v.readonly
- False
- >>> v[0] = b'z'
- >>> data
- bytearray(b'zbcefg')
- >>> v[1:4] = b'123'
- >>> data
- bytearray(b'z123fg')
- >>> v[2] = b'spam'
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- ValueError: cannot modify size of memoryview object
-
- Notice how the size of the memoryview object cannot be changed.
-
- :class:`memoryview` has several methods:
-
- .. method:: tobytes()
-
- Return the data in the buffer as a bytestring. This is equivalent to
- calling the :class:`bytes` constructor on the memoryview. ::
-
- >>> m = memoryview(b"abc")
- >>> m.tobytes()
- b'abc'
- >>> bytes(m)
- b'abc'
-
- .. method:: tolist()
-
- Return the data in the buffer as a list of integers. ::
-
- >>> memoryview(b'abc').tolist()
- [97, 98, 99]
-
- .. method:: release()
-
- Release the underlying buffer exposed by the memoryview object. Many
- objects take special actions when a view is held on them (for example,
- a :class:`bytearray` would temporarily forbid resizing); therefore,
- calling release() is handy to remove these restrictions (and free any
- dangling resources) as soon as possible.
-
- After this method has been called, any further operation on the view
- raises a :class:`ValueError` (except :meth:`release()` itself which can
- be called multiple times)::
-
- >>> m = memoryview(b'abc')
- >>> m.release()
- >>> m[0]
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- ValueError: operation forbidden on released memoryview object
-
- The context management protocol can be used for a similar effect,
- using the ``with`` statement::
-
- >>> with memoryview(b'abc') as m:
- ... m[0]
- ...
- b'a'
- >>> m[0]
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- ValueError: operation forbidden on released memoryview object
-
- .. versionadded:: 3.2
-
- There are also several readonly attributes available:
-
- .. attribute:: format
-
- A string containing the format (in :mod:`struct` module style) for each
- element in the view. This defaults to ``'B'``, a simple bytestring.
-
- .. attribute:: itemsize
-
- The size in bytes of each element of the memoryview::
-
- >>> m = memoryview(array.array('H', [1,2,3]))
- >>> m.itemsize
- 2
- >>> m[0]
- b'\x01\x00'
- >>> len(m[0]) == m.itemsize
- True
-
- .. attribute:: shape
-
- A tuple of integers the length of :attr:`ndim` giving the shape of the
- memory as a N-dimensional array.
-
- .. attribute:: ndim
-
- An integer indicating how many dimensions of a multi-dimensional array the
- memory represents.
-
- .. attribute:: strides
-
- A tuple of integers the length of :attr:`ndim` giving the size in bytes to
- access each element for each dimension of the array.
-
- .. attribute:: readonly
-
- A bool indicating whether the memory is read only.
-
- .. memoryview.suboffsets isn't documented because it only seems useful for C
-
-
.. _typecontextmanager:
Context Manager Types
@@ -2725,7 +3415,7 @@ The Null Object
This object is returned by functions that don't explicitly return a value. It
supports no special operations. There is exactly one null object, named
-``None`` (a built-in name).
+``None`` (a built-in name). ``type(None)()`` produces the same singleton.
It is written as ``None``.
@@ -2737,7 +3427,8 @@ The Ellipsis Object
This object is commonly used by slicing (see :ref:`slicings`). It supports no
special operations. There is exactly one ellipsis object, named
-:const:`Ellipsis` (a built-in name).
+:const:`Ellipsis` (a built-in name). ``type(Ellipsis)()`` produces the
+:const:`Ellipsis` singleton.
It is written as ``Ellipsis`` or ``...``.
@@ -2749,7 +3440,8 @@ The NotImplemented Object
This object is returned from comparisons and binary operations when they are
asked to operate on types they don't support. See :ref:`comparisons` for more
-information.
+information. There is exactly one ``NotImplemented`` object.
+``type(NotImplemented)()`` produces the singleton instance.
It is written as ``NotImplemented``.
@@ -2815,6 +3507,13 @@ types, where they are relevant. Some of these are not reported by the
The name of the class or type.
+.. attribute:: class.__qualname__
+
+ The :term:`qualified name` of the class or type.
+
+ .. versionadded:: 3.3
+
+
.. attribute:: class.__mro__
This attribute is a tuple of classes that are considered when looking for
diff --git a/Doc/library/string.rst b/Doc/library/string.rst
index 79d4e3f..9c63272 100644
--- a/Doc/library/string.rst
+++ b/Doc/library/string.rst
@@ -10,7 +10,7 @@
.. seealso::
- :ref:`typesseq`
+ :ref:`textseq`
:ref:`string-methods`
diff --git a/Doc/library/strings.rst b/Doc/library/strings.rst
deleted file mode 100644
index 08f1658..0000000
--- a/Doc/library/strings.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-.. _stringservices:
-
-***************
-String Services
-***************
-
-The modules described in this chapter provide a wide range of string
-manipulation operations.
-
-In addition, Python's built-in string classes support the sequence type methods
-described in the :ref:`typesseq` section, and also the string-specific methods
-described in the :ref:`string-methods` section. To output formatted strings,
-see the :ref:`string-formatting` section. Also, see the :mod:`re` module for
-string functions based on regular expressions.
-
-
-.. toctree::
-
- string.rst
- re.rst
- struct.rst
- difflib.rst
- textwrap.rst
- codecs.rst
- unicodedata.rst
- stringprep.rst
-
diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst
index 12820e0..994506c 100644
--- a/Doc/library/struct.rst
+++ b/Doc/library/struct.rst
@@ -187,17 +187,24 @@ platform-dependent.
| ``Q`` | :c:type:`unsigned long | integer | 8 | \(2), \(3) |
| | long` | | | |
+--------+--------------------------+--------------------+----------------+------------+
-| ``f`` | :c:type:`float` | float | 4 | \(4) |
+| ``n`` | :c:type:`ssize_t` | integer | | \(4) |
+--------+--------------------------+--------------------+----------------+------------+
-| ``d`` | :c:type:`double` | float | 8 | \(4) |
+| ``N`` | :c:type:`size_t` | integer | | \(4) |
++--------+--------------------------+--------------------+----------------+------------+
+| ``f`` | :c:type:`float` | float | 4 | \(5) |
++--------+--------------------------+--------------------+----------------+------------+
+| ``d`` | :c:type:`double` | float | 8 | \(5) |
+--------+--------------------------+--------------------+----------------+------------+
| ``s`` | :c:type:`char[]` | bytes | | |
+--------+--------------------------+--------------------+----------------+------------+
| ``p`` | :c:type:`char[]` | bytes | | |
+--------+--------------------------+--------------------+----------------+------------+
-| ``P`` | :c:type:`void \*` | integer | | \(5) |
+| ``P`` | :c:type:`void \*` | integer | | \(6) |
+--------+--------------------------+--------------------+----------------+------------+
+.. versionchanged:: 3.3
+ Added support for the ``'n'`` and ``'N'`` formats.
+
Notes:
(1)
@@ -219,11 +226,17 @@ Notes:
Use of the :meth:`__index__` method for non-integers is new in 3.2.
(4)
+ The ``'n'`` and ``'N'`` conversion codes are only available for the native
+ size (selected as the default or with the ``'@'`` byte order character).
+ For the standard size, you can use whichever of the other integer formats
+ fits your application.
+
+(5)
For the ``'f'`` and ``'d'`` conversion codes, the packed representation uses
the IEEE 754 binary32 (for ``'f'``) or binary64 (for ``'d'``) format,
regardless of the floating-point format used by the platform.
-(5)
+(6)
The ``'P'`` format character is only available for the native byte ordering
(selected as the default or with the ``'@'`` byte order character). The byte
order character ``'='`` chooses to use little- or big-endian ordering based
diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst
index e260a1f..66d6cbb 100644
--- a/Doc/library/subprocess.rst
+++ b/Doc/library/subprocess.rst
@@ -30,16 +30,21 @@ convenience functions for all use cases they can handle. For more advanced
use cases, the underlying :class:`Popen` interface can be used directly.
-.. function:: call(args, *, stdin=None, stdout=None, stderr=None, shell=False)
+.. function:: call(args, *, stdin=None, stdout=None, stderr=None, shell=False, timeout=None)
Run the command described by *args*. Wait for command to complete, then
return the :attr:`returncode` attribute.
The arguments shown above are merely the most common ones, described below
- in :ref:`frequently-used-arguments` (hence the slightly odd notation in
- the abbreviated signature). The full function signature is the same as
- that of the :class:`Popen` constructor - this functions passes all
- supplied arguments directly through to that interface.
+ in :ref:`frequently-used-arguments` (hence the use of keyword-only notation
+ in the abbreviated signature). The full function signature is largely the
+ same as that of the :class:`Popen` constructor - this function passes all
+ supplied arguments other than *timeout* directly through to that interface.
+
+ The *timeout* argument is passed to :meth:`Popen.wait`. If the timeout
+ expires, the child process will be killed and then waited for again. The
+ :exc:`TimeoutExpired` exception will be re-raised after the child process
+ has terminated.
Examples::
@@ -62,8 +67,11 @@ use cases, the underlying :class:`Popen` interface can be used directly.
process may block if it generates enough output to a pipe to fill up
the OS pipe buffer.
+ .. versionchanged:: 3.3
+ *timeout* was added.
+
-.. function:: check_call(args, *, stdin=None, stdout=None, stderr=None, shell=False)
+.. function:: check_call(args, *, stdin=None, stdout=None, stderr=None, shell=False, timeout=None)
Run command with arguments. Wait for command to complete. If the return
code was zero then return, otherwise raise :exc:`CalledProcessError`. The
@@ -71,10 +79,15 @@ use cases, the underlying :class:`Popen` interface can be used directly.
:attr:`returncode` attribute.
The arguments shown above are merely the most common ones, described below
- in :ref:`frequently-used-arguments` (hence the slightly odd notation in
- the abbreviated signature). The full function signature is the same as
- that of the :class:`Popen` constructor - this functions passes all
- supplied arguments directly through to that interface.
+ in :ref:`frequently-used-arguments` (hence the use of keyword-only notation
+ in the abbreviated signature). The full function signature is largely the
+ same as that of the :class:`Popen` constructor - this function passes all
+ supplied arguments other than *timeout* directly through to that interface.
+
+ The *timeout* argument is passed to :meth:`Popen.wait`. If the timeout
+ expires, the child process will be killed and then waited for again. The
+ :exc:`TimeoutExpired` exception will be re-raised after the child process
+ has terminated.
Examples::
@@ -86,8 +99,6 @@ use cases, the underlying :class:`Popen` interface can be used directly.
...
subprocess.CalledProcessError: Command 'exit 1' returned non-zero exit status 1
- .. versionadded:: 2.5
-
.. warning::
Invoking the system shell with ``shell=True`` can be a security hazard
@@ -101,8 +112,11 @@ use cases, the underlying :class:`Popen` interface can be used directly.
process may block if it generates enough output to a pipe to fill up
the OS pipe buffer.
+ .. versionchanged:: 3.3
+ *timeout* was added.
+
-.. function:: check_output(args, *, stdin=None, stderr=None, shell=False, universal_newlines=False)
+.. function:: check_output(args, *, stdin=None, stderr=None, shell=False, universal_newlines=False, timeout=None)
Run command with arguments and return its output as a byte string.
@@ -112,11 +126,17 @@ use cases, the underlying :class:`Popen` interface can be used directly.
attribute.
The arguments shown above are merely the most common ones, described below
- in :ref:`frequently-used-arguments` (hence the slightly odd notation in
- the abbreviated signature). The full function signature is largely the
- same as that of the :class:`Popen` constructor, except that *stdout* is
- not permitted as it is used internally. All other supplied arguments are
- passed directly through to the :class:`Popen` constructor.
+ in :ref:`frequently-used-arguments` (hence the use of keyword-only notation
+ in the abbreviated signature). The full function signature is largely the
+ same as that of the :class:`Popen` constructor - this functions passes all
+ supplied arguments other than *timeout* directly through to that interface.
+ In addition, *stdout* is not permitted as an argument, as it is used
+ internally to collect the output from the subprocess.
+
+ The *timeout* argument is passed to :meth:`Popen.wait`. If the timeout
+ expires, the child process will be killed and then waited for again. The
+ :exc:`TimeoutExpired` exception will be re-raised after the child process
+ has terminated.
Examples::
@@ -147,7 +167,7 @@ use cases, the underlying :class:`Popen` interface can be used directly.
... shell=True)
'ls: non_existent_file: No such file or directory\n'
- .. versionadded:: 2.7
+ .. versionadded:: 3.1
.. warning::
@@ -161,6 +181,18 @@ use cases, the underlying :class:`Popen` interface can be used directly.
read in the current process, the child process may block if it
generates enough output to the pipe to fill up the OS pipe buffer.
+ .. versionchanged:: 3.3
+ *timeout* was added.
+
+
+.. data:: DEVNULL
+
+ Special value that can be used as the *stdin*, *stdout* or *stderr* argument
+ to :class:`Popen` and indicates that the special file :data:`os.devnull`
+ will be used.
+
+ .. versionadded:: 3.3
+
.. data:: PIPE
@@ -176,10 +208,38 @@ use cases, the underlying :class:`Popen` interface can be used directly.
output.
+.. exception:: SubprocessError
+
+ Base class for all other exceptions from this module.
+
+ .. versionadded:: 3.3
+
+
+.. exception:: TimeoutExpired
+
+ Subclass of :exc:`SubprocessError`, raised when a timeout expires
+ while waiting for a child process.
+
+ .. attribute:: cmd
+
+ Command that was used to spawn the child process.
+
+ .. attribute:: timeout
+
+ Timeout in seconds.
+
+ .. attribute:: output
+
+ Output of the child process if this exception is raised by
+ :func:`check_output`. Otherwise, ``None``.
+
+ .. versionadded:: 3.3
+
+
.. exception:: CalledProcessError
- Exception raised when a process run by :func:`check_call` or
- :func:`check_output` returns a non-zero exit status.
+ Subclass of :exc:`SubprocessError`, raised when a process run by
+ :func:`check_call` or :func:`check_output` returns a non-zero exit status.
.. attribute:: returncode
@@ -216,25 +276,27 @@ default values. The arguments that are most commonly needed are:
*stdin*, *stdout* and *stderr* specify the executed program's standard input,
standard output and standard error file handles, respectively. Valid values
- are :data:`PIPE`, an existing file descriptor (a positive integer), an
- existing file object, and ``None``. :data:`PIPE` indicates that a new pipe
- to the child should be created. With the default settings of ``None``, no
- redirection will occur; the child's file handles will be inherited from the
- parent. Additionally, *stderr* can be :data:`STDOUT`, which indicates that
- the stderr data from the child process should be captured into the same file
- handle as for stdout.
+ are :data:`PIPE`, :data:`DEVNULL`, an existing file descriptor (a positive
+ integer), an existing file object, and ``None``. :data:`PIPE` indicates
+ that a new pipe to the child should be created. :data:`DEVNULL` indicates
+ that the special file :data:`os.devnull` will be used. With the default
+ settings of ``None``, no redirection will occur; the child's file handles
+ will be inherited from the parent. Additionally, *stderr* can be
+ :data:`STDOUT`, which indicates that the stderr data from the child
+ process should be captured into the same file handle as for *stdout*.
.. index::
single: universal newlines; subprocess module
- If *universal_newlines* is ``True``, the file objects *stdin*, *stdout*
- and *stderr* will be opened as text streams in :term:`universal newlines`
- mode using the encoding returned by :func:`locale.getpreferredencoding`.
- For *stdin*, line ending characters ``'\n'`` in the input will be converted
- to the default line separator :data:`os.linesep`. For *stdout* and
- *stderr*, all line endings in the output will be converted to ``'\n'``.
- For more information see the documentation of the :class:`io.TextIOWrapper`
- class when the *newline* argument to its constructor is ``None``.
+ If *universal_newlines* is ``True``, the file objects *stdin*, *stdout* and
+ *stderr* will be opened as text streams in :term:`universal newlines` mode
+ using the encoding returned by :func:`locale.getpreferredencoding(False)
+ <locale.getpreferredencoding>`. For *stdin*, line ending characters
+ ``'\n'`` in the input will be converted to the default line separator
+ :data:`os.linesep`. For *stdout* and *stderr*, all line endings in the
+ output will be converted to ``'\n'``. For more information see the
+ documentation of the :class:`io.TextIOWrapper` class when the *newline*
+ argument to its constructor is ``None``.
.. note::
@@ -254,6 +316,12 @@ default values. The arguments that are most commonly needed are:
:mod:`fnmatch`, :func:`os.walk`, :func:`os.path.expandvars`,
:func:`os.path.expanduser`, and :mod:`shutil`).
+ .. versionchanged:: 3.3
+ When *universal_newlines* is ``True``, the class uses the encoding
+ :func:`locale.getpreferredencoding(False) <locale.getpreferredencoding>`
+ instead of ``locale.getpreferredencoding()``. See the
+ :class:`io.TextIOWrapper` class for more information on this change.
+
.. warning::
Executing shell commands that incorporate unsanitized input from an
@@ -382,13 +450,14 @@ functions.
*stdin*, *stdout* and *stderr* specify the executed program's standard input,
standard output and standard error file handles, respectively. Valid values
- are :data:`PIPE`, an existing file descriptor (a positive integer), an
- existing :term:`file object`, and ``None``. :data:`PIPE` indicates that a
- new pipe to the child should be created. With the default settings of
- ``None``, no redirection will occur; the child's file handles will be
- inherited from the parent. Additionally, *stderr* can be :data:`STDOUT`,
- which indicates that the stderr data from the applications should be
- captured into the same file handle as for stdout.
+ are :data:`PIPE`, :data:`DEVNULL`, an existing file descriptor (a positive
+ integer), an existing :term:`file object`, and ``None``. :data:`PIPE`
+ indicates that a new pipe to the child should be created. :data:`DEVNULL`
+ indicates that the special file :data:`os.devnull` will be used. With the
+ default settings of ``None``, no redirection will occur; the child's file
+ handles will be inherited from the parent. Additionally, *stderr* can be
+ :data:`STDOUT`, which indicates that the stderr data from the applications
+ should be captured into the same file handle as for stdout.
If *preexec_fn* is set to a callable object, this object will be called in the
child process just before the child is executed.
@@ -498,6 +567,15 @@ arguments.
:exc:`CalledProcessError` if the called process returns a non-zero return
code.
+All of the functions and methods that accept a *timeout* parameter, such as
+:func:`call` and :meth:`Popen.communicate` will raise :exc:`TimeoutExpired` if
+the timeout expires before the process exits.
+
+Exceptions defined in this module all inherit from :exc:`SubprocessError`.
+
+ .. versionadded:: 3.3
+ The :exc:`SubprocessError` base class was added.
+
Security
^^^^^^^^
@@ -521,11 +599,15 @@ Instances of the :class:`Popen` class have the following methods:
attribute.
-.. method:: Popen.wait()
+.. method:: Popen.wait(timeout=None)
Wait for child process to terminate. Set and return :attr:`returncode`
attribute.
+ If the process does not terminate after *timeout* seconds, raise a
+ :exc:`TimeoutExpired` exception. It is safe to catch this exception and
+ retry the wait.
+
.. warning::
This will deadlock when using ``stdout=PIPE`` and/or
@@ -533,13 +615,17 @@ Instances of the :class:`Popen` class have the following methods:
a pipe such that it blocks waiting for the OS pipe buffer to
accept more data. Use :meth:`communicate` to avoid that.
+ .. versionchanged:: 3.3
+ *timeout* was added.
+
-.. method:: Popen.communicate(input=None)
+.. method:: Popen.communicate(input=None, timeout=None)
Interact with process: Send data to stdin. Read data from stdout and stderr,
- until end-of-file is reached. Wait for process to terminate. The optional
- *input* argument should be a byte string to be sent to the child process, or
- ``None``, if no data should be sent to the child.
+ until end-of-file is reached. Wait for process to terminate. The optional
+ *input* argument should be data to be sent to the child process, or
+ ``None``, if no data should be sent to the child. The type of *input*
+ must be bytes or, if *universal_newlines* was ``True``, a string.
:meth:`communicate` returns a tuple ``(stdoutdata, stderrdata)``.
@@ -548,11 +634,29 @@ Instances of the :class:`Popen` class have the following methods:
``None`` in the result tuple, you need to give ``stdout=PIPE`` and/or
``stderr=PIPE`` too.
+ If the process does not terminate after *timeout* seconds, a
+ :exc:`TimeoutExpired` exception will be raised. Catching this exception and
+ retrying communication will not lose any output.
+
+ The child process is not killed if the timeout expires, so in order to
+ cleanup properly a well-behaved application should kill the child process and
+ finish communication::
+
+ proc = subprocess.Popen(...)
+ try:
+ outs, errs = proc.communicate(timeout=15)
+ except TimeoutExpired:
+ proc.kill()
+ outs, errs = proc.communicate()
+
.. note::
The data read is buffered in memory, so do not use this method if the data
size is large or unlimited.
+ .. versionchanged:: 3.3
+ *timeout* was added.
+
.. method:: Popen.send_signal(signal)
@@ -994,3 +1098,9 @@ runtime):
backslash. If the number of backslashes is odd, the last
backslash escapes the next double quotation mark as
described in rule 3.
+
+
+.. seealso::
+
+ :mod:`shlex`
+ Module which provides function to parse and escape command lines.
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index 0e4adec..cd6d4bf 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -29,6 +29,33 @@ always available.
command line, see the :mod:`fileinput` module.
+.. data:: base_exec_prefix
+
+ Set during Python startup, before ``site.py`` is run, to the same value as
+ :data:`exec_prefix`. If not running in a
+ :ref:`virtual environment <venv-def>`, the values will stay the same; if
+ ``site.py`` finds that a virtual environment is in use, the values of
+ :data:`prefix` and :data:`exec_prefix` will be changed to point to the
+ virtual environment, whereas :data:`base_prefix` and
+ :data:`base_exec_prefix` will remain pointing to the base Python
+ installation (the one which the virtual environment was created from).
+
+ .. versionadded:: 3.3
+
+
+.. data:: base_prefix
+
+ Set during Python startup, before ``site.py`` is run, to the same value as
+ :data:`prefix`. If not running in a :ref:`virtual environment <venv-def>`, the values
+ will stay the same; if ``site.py`` finds that a virtual environment is in
+ use, the values of :data:`prefix` and :data:`exec_prefix` will be changed to
+ point to the virtual environment, whereas :data:`base_prefix` and
+ :data:`base_exec_prefix` will remain pointing to the base Python
+ installation (the one which the virtual environment was created from).
+
+ .. versionadded:: 3.3
+
+
.. data:: byteorder
An indicator of the native byte order. This will have the value ``'big'`` on
@@ -80,6 +107,22 @@ always available.
This function should be used for internal and specialized purposes only.
+.. function:: _debugmallocstats()
+
+ Print low-level information to stderr about the state of CPython's memory
+ allocator.
+
+ If Python is configured --with-pydebug, it also performs some expensive
+ internal consistency checks.
+
+ .. versionadded:: 3.3
+
+ .. impl-detail::
+
+ This function is specific to CPython. The exact output format is not
+ defined here, and may change.
+
+
.. data:: dllhandle
Integer specifying the handle of the Python DLL. Availability: Windows.
@@ -199,6 +242,11 @@ always available.
installed in :file:`{exec_prefix}/lib/python{X.Y}/lib-dynload`, where *X.Y*
is the version number of Python, for example ``3.2``.
+ .. note:: If a :ref:`virtual environment <venv-def>` is in effect, this
+ value will be changed in ``site.py`` to point to the virtual environment.
+ The value for the Python installation will still be available, via
+ :data:`base_exec_prefix`.
+
.. data:: executable
@@ -235,14 +283,13 @@ always available.
.. data:: flags
- The struct sequence *flags* exposes the status of command line flags. The
- attributes are read only.
+ The :term:`struct sequence` *flags* exposes the status of command line
+ flags. The attributes are read only.
============================= =============================
attribute flag
============================= =============================
:const:`debug` :option:`-d`
- :const:`division_warning` :option:`-Q`
:const:`inspect` :option:`-i`
:const:`interactive` :option:`-i`
:const:`optimize` :option:`-O` or :option:`-OO`
@@ -262,15 +309,18 @@ always available.
.. versionadded:: 3.2.3
The ``hash_randomization`` attribute.
+ .. versionchanged:: 3.3
+ Removed obsolete ``division_warning`` attribute.
+
.. data:: float_info
- A structseq holding information about the float type. It contains low level
- information about the precision and internal representation. The values
- correspond to the various floating-point constants defined in the standard
- header file :file:`float.h` for the 'C' programming language; see section
- 5.2.4.2.2 of the 1999 ISO/IEC C standard [C99]_, 'Characteristics of
- floating types', for details.
+ A :term:`struct sequence` holding information about the float type. It
+ contains low level information about the precision and internal
+ representation. The values correspond to the various floating-point
+ constants defined in the standard header file :file:`float.h` for the 'C'
+ programming language; see section 5.2.4.2.2 of the 1999 ISO/IEC C standard
+ [C99]_, 'Characteristics of floating types', for details.
+---------------------+----------------+--------------------------------------------------+
| attribute | float.h macro | explanation |
@@ -409,6 +459,9 @@ always available.
does not have to hold true for third-party extensions as it is implementation
specific.
+ Only the memory consumption directly attributed to the object is
+ accounted for, not the memory consumption of objects it refers to.
+
If given, *default* will be returned if the object does not provide means to
retrieve the size. Otherwise a :exc:`TypeError` will be raised.
@@ -520,8 +573,9 @@ always available.
.. data:: hash_info
- A structseq giving parameters of the numeric hash implementation. For
- more details about hashing of numeric types, see :ref:`numeric-hash`.
+ A :term:`struct sequence` giving parameters of the numeric hash
+ implementation. For more details about hashing of numeric types, see
+ :ref:`numeric-hash`.
+---------------------+--------------------------------------------------+
| attribute | explanation |
@@ -556,8 +610,8 @@ always available.
This is called ``hexversion`` since it only really looks meaningful when viewed
as the result of passing it to the built-in :func:`hex` function. The
- struct sequence :data:`sys.version_info` may be used for a more human-friendly
- encoding of the same information.
+ :term:`struct sequence` :data:`sys.version_info` may be used for a more
+ human-friendly encoding of the same information.
The ``hexversion`` is a 32-bit number with the following layout:
@@ -583,10 +637,51 @@ always available.
Thus ``2.1.0a3`` is hexversion ``0x020100a3``.
+
+.. data:: implementation
+
+ An object containing information about the implementation of the
+ currently running Python interpreter. The following attributes are
+ required to exist in all Python implementations.
+
+ *name* is the implementation's identifier, e.g. ``'cpython'``. The actual
+ string is defined by the Python implementation, but it is guaranteed to be
+ lower case.
+
+ *version* is a named tuple, in the same format as
+ :data:`sys.version_info`. It represents the version of the Python
+ *implementation*. This has a distinct meaning from the specific
+ version of the Python *language* to which the currently running
+ interpreter conforms, which ``sys.version_info`` represents. For
+ example, for PyPy 1.8 ``sys.implementation.version`` might be
+ ``sys.version_info(1, 8, 0, 'final', 0)``, whereas ``sys.version_info``
+ would be ``sys.version_info(2, 7, 2, 'final', 0)``. For CPython they
+ are the same value, since it is the reference implementation.
+
+ *hexversion* is the implementation version in hexadecimal format, like
+ :data:`sys.hexversion`.
+
+ *cache_tag* is the tag used by the import machinery in the filenames of
+ cached modules. By convention, it would be a composite of the
+ implementation's name and version, like ``'cpython-33'``. However, a
+ Python implementation may use some other value if appropriate. If
+ ``cache_tag`` is set to ``None``, it indicates that module caching should
+ be disabled.
+
+ :data:`sys.implementation` may contain additional attributes specific to
+ the Python implementation. These non-standard attributes must start with
+ an underscore, and are not described here. Regardless of its contents,
+ :data:`sys.implementation` will not change during a run of the interpreter,
+ nor between implementation versions. (It may change between Python
+ language versions, however.) See `PEP 421` for more information.
+
+ .. versionadded:: 3.3
+
+
.. data:: int_info
- A struct sequence that holds information about Python's
- internal representation of integers. The attributes are read only.
+ A :term:`struct sequence` that holds information about Python's internal
+ representation of integers. The attributes are read only.
+-------------------------+----------------------------------------------+
| Attribute | Explanation |
@@ -641,9 +736,13 @@ always available.
.. data:: maxunicode
- An integer giving the largest supported code point for a Unicode character. The
- value of this depends on the configuration option that specifies whether Unicode
- characters are stored as UCS-2 or UCS-4.
+ An integer giving the value of the largest Unicode code point,
+ i.e. ``1114111`` (``0x10FFFF`` in hexadecimal).
+
+ .. versionchanged:: 3.3
+ Before :pep:`393`, ``sys.maxunicode`` used to be either ``0xFFFF``
+ or ``0x10FFFF``, depending on the configuration option that specified
+ whether Unicode characters were stored as UCS-2 or UCS-4.
.. data:: meta_path
@@ -718,36 +817,35 @@ always available.
This string contains a platform identifier that can be used to append
platform-specific components to :data:`sys.path`, for instance.
- For most Unix systems, this is the lowercased OS name as returned by ``uname
- -s`` with the first part of the version as returned by ``uname -r`` appended,
- e.g. ``'sunos5'``, *at the time when Python was built*. Unless you want to
- test for a specific system version, it is therefore recommended to use the
- following idiom::
+ For Unix systems, except on Linux, this is the lowercased OS name as
+ returned by ``uname -s`` with the first part of the version as returned by
+ ``uname -r`` appended, e.g. ``'sunos5'`` or ``'freebsd8'``, *at the time
+ when Python was built*. Unless you want to test for a specific system
+ version, it is therefore recommended to use the following idiom::
if sys.platform.startswith('freebsd'):
# FreeBSD-specific code here...
elif sys.platform.startswith('linux'):
# Linux-specific code here...
- .. versionchanged:: 3.2.2
- Since lots of code check for ``sys.platform == 'linux2'``, and there is
- no essential change between Linux 2.x and 3.x, ``sys.platform`` is always
- set to ``'linux2'``, even on Linux 3.x. In Python 3.3 and later, the
- value will always be set to ``'linux'``, so it is recommended to always
- use the ``startswith`` idiom presented above.
-
For other systems, the values are:
- ====================== ===========================
- System :data:`platform` value
- ====================== ===========================
- Linux (2.x *and* 3.x) ``'linux2'``
- Windows ``'win32'``
- Windows/Cygwin ``'cygwin'``
- Mac OS X ``'darwin'``
- OS/2 ``'os2'``
- OS/2 EMX ``'os2emx'``
- ====================== ===========================
+ ================ ===========================
+ System :data:`platform` value
+ ================ ===========================
+ Linux ``'linux'``
+ Windows ``'win32'``
+ Windows/Cygwin ``'cygwin'``
+ Mac OS X ``'darwin'``
+ OS/2 ``'os2'``
+ OS/2 EMX ``'os2emx'``
+ ================ ===========================
+
+ .. versionchanged:: 3.3
+ On Linux, :attr:`sys.platform` doesn't contain the major version anymore.
+ It is always ``'linux'``, instead of ``'linux2'`` or ``'linux3'``. Since
+ older Python versions include the version number, it is recommended to
+ always use the ``startswith`` idiom presented above.
.. seealso::
@@ -764,11 +862,16 @@ always available.
independent Python files are installed; by default, this is the string
``'/usr/local'``. This can be set at build time with the ``--prefix``
argument to the :program:`configure` script. The main collection of Python
- library modules is installed in the directory :file:`{prefix}/lib/python{X.Y}``
+ library modules is installed in the directory :file:`{prefix}/lib/python{X.Y}`
while the platform independent header files (all except :file:`pyconfig.h`) are
stored in :file:`{prefix}/include/python{X.Y}`, where *X.Y* is the version
number of Python, for example ``3.2``.
+ .. note:: If a :ref:`virtual environment <venv-def>` is in effect, this
+ value will be changed in ``site.py`` to point to the virtual
+ environment. The value for the Python installation will still be
+ available, via :data:`base_prefix`.
+
.. data:: ps1
ps2
@@ -806,11 +909,11 @@ always available.
the interpreter loads extension modules. Among other things, this will enable a
lazy resolving of symbols when importing a module, if called as
``sys.setdlopenflags(0)``. To share symbols across extension modules, call as
- ``sys.setdlopenflags(ctypes.RTLD_GLOBAL)``. Symbolic names for the
- flag modules can be either found in the :mod:`ctypes` module, or in the :mod:`DLFCN`
- module. If :mod:`DLFCN` is not available, it can be generated from
- :file:`/usr/include/dlfcn.h` using the :program:`h2py` script. Availability:
- Unix.
+ ``sys.setdlopenflags(os.RTLD_GLOBAL)``. Symbolic names for the flag modules
+ can be found in the :mod:`os` module (``RTLD_xxx`` constants, e.g.
+ :data:`os.RTLD_LAZY`).
+
+ Availability: Unix.
.. function:: setprofile(profilefunc)
@@ -1003,22 +1106,33 @@ always available.
to a console and Python apps started with :program:`pythonw`.
-.. data:: subversion
+.. data:: thread_info
- A triple (repo, branch, version) representing the Subversion information of the
- Python interpreter. *repo* is the name of the repository, ``'CPython'``.
- *branch* is a string of one of the forms ``'trunk'``, ``'branches/name'`` or
- ``'tags/name'``. *version* is the output of ``svnversion``, if the interpreter
- was built from a Subversion checkout; it contains the revision number (range)
- and possibly a trailing 'M' if there were local modifications. If the tree was
- exported (or svnversion was not available), it is the revision of
- ``Include/patchlevel.h`` if the branch is a tag. Otherwise, it is ``None``.
+ A :term:`struct sequence` holding information about the thread
+ implementation.
- .. deprecated:: 3.2.1
- Python is now `developed <http://docs.python.org/devguide/>`_ using
- Mercurial. In recent Python 3.2 bugfix releases, :data:`subversion`
- therefore contains placeholder information. It is removed in Python
- 3.3.
+ +------------------+---------------------------------------------------------+
+ | Attribute | Explanation |
+ +==================+=========================================================+
+ | :const:`name` | Name of the thread implementation: |
+ | | |
+ | | * ``'nt'``: Windows threads |
+ | | * ``'os2'``: OS/2 threads |
+ | | * ``'pthread'``: POSIX threads |
+ | | * ``'solaris'``: Solaris threads |
+ +------------------+---------------------------------------------------------+
+ | :const:`lock` | Name of the lock implementation: |
+ | | |
+ | | * ``'semaphore'``: a lock uses a semaphore |
+ | | * ``'mutex+cond'``: a lock uses a mutex |
+ | | and a condition variable |
+ | | * ``None`` if this information is unknown |
+ +------------------+---------------------------------------------------------+
+ | :const:`version` | Name and version of the thread library. It is a string, |
+ | | or ``None`` if these informations are unknown. |
+ +------------------+---------------------------------------------------------+
+
+ .. versionadded:: 3.3
.. data:: tracebacklimit
diff --git a/Doc/library/syslog.rst b/Doc/library/syslog.rst
index 974ecf9..6e90dc0 100644
--- a/Doc/library/syslog.rst
+++ b/Doc/library/syslog.rst
@@ -79,12 +79,14 @@ Priority levels (high to low):
Facilities:
:const:`LOG_KERN`, :const:`LOG_USER`, :const:`LOG_MAIL`, :const:`LOG_DAEMON`,
:const:`LOG_AUTH`, :const:`LOG_LPR`, :const:`LOG_NEWS`, :const:`LOG_UUCP`,
- :const:`LOG_CRON`, :const:`LOG_SYSLOG` and :const:`LOG_LOCAL0` to
- :const:`LOG_LOCAL7`.
+ :const:`LOG_CRON`, :const:`LOG_SYSLOG`, :const:`LOG_LOCAL0` to
+ :const:`LOG_LOCAL7`, and, if defined in ``<syslog.h>``,
+ :const:`LOG_AUTHPRIV`.
Log options:
- :const:`LOG_PID`, :const:`LOG_CONS`, :const:`LOG_NDELAY`, :const:`LOG_NOWAIT`
- and :const:`LOG_PERROR` if defined in ``<syslog.h>``.
+ :const:`LOG_PID`, :const:`LOG_CONS`, :const:`LOG_NDELAY`, and, if defined
+ in ``<syslog.h>``, :const:`LOG_ODELAY`, :const:`LOG_NOWAIT`, and
+ :const:`LOG_PERROR`.
Examples
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
index 46e4900..86dd33d 100644
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -13,13 +13,13 @@
--------------
The :mod:`tarfile` module makes it possible to read and write tar
-archives, including those using gzip or bz2 compression.
+archives, including those using gzip, bz2 and lzma compression.
Use the :mod:`zipfile` module to read or write :file:`.zip` files, or the
higher-level functions in :ref:`shutil <archiving-operations>`.
Some facts and figures:
-* reads and writes :mod:`gzip` and :mod:`bz2` compressed archives.
+* reads and writes :mod:`gzip`, :mod:`bz2` and :mod:`lzma` compressed archives.
* read/write support for the POSIX.1-1988 (ustar) format.
@@ -33,6 +33,9 @@ Some facts and figures:
character devices and block devices and is able to acquire and restore file
information like timestamp, access permissions and owner.
+.. versionchanged:: 3.3
+ Added support for :mod:`lzma` compression.
+
.. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, \*\*kwargs)
@@ -56,6 +59,8 @@ Some facts and figures:
+------------------+---------------------------------------------+
| ``'r:bz2'`` | Open for reading with bzip2 compression. |
+------------------+---------------------------------------------+
+ | ``'r:xz'`` | Open for reading with lzma compression. |
+ +------------------+---------------------------------------------+
| ``'a' or 'a:'`` | Open for appending with no compression. The |
| | file is created if it does not exist. |
+------------------+---------------------------------------------+
@@ -65,11 +70,13 @@ Some facts and figures:
+------------------+---------------------------------------------+
| ``'w:bz2'`` | Open for bzip2 compressed writing. |
+------------------+---------------------------------------------+
+ | ``'w:xz'`` | Open for lzma compressed writing. |
+ +------------------+---------------------------------------------+
- Note that ``'a:gz'`` or ``'a:bz2'`` is not possible. If *mode* is not suitable
- to open a certain (compressed) file for reading, :exc:`ReadError` is raised. Use
- *mode* ``'r'`` to avoid this. If a compression method is not supported,
- :exc:`CompressionError` is raised.
+ Note that ``'a:gz'``, ``'a:bz2'`` or ``'a:xz'`` is not possible. If *mode*
+ is not suitable to open a certain (compressed) file for reading,
+ :exc:`ReadError` is raised. Use *mode* ``'r'`` to avoid this. If a
+ compression method is not supported, :exc:`CompressionError` is raised.
If *fileobj* is specified, it is used as an alternative to a :term:`file object`
opened in binary mode for *name*. It is supposed to be at position 0.
@@ -100,6 +107,9 @@ Some facts and figures:
| ``'r|bz2'`` | Open a bzip2 compressed *stream* for |
| | reading. |
+-------------+--------------------------------------------+
+ | ``'r|xz'`` | Open a lzma compressed *stream* for |
+ | | reading. |
+ +-------------+--------------------------------------------+
| ``'w|'`` | Open an uncompressed *stream* for writing. |
+-------------+--------------------------------------------+
| ``'w|gz'`` | Open a gzip compressed *stream* for |
@@ -108,6 +118,9 @@ Some facts and figures:
| ``'w|bz2'`` | Open a bzip2 compressed *stream* for |
| | writing. |
+-------------+--------------------------------------------+
+ | ``'w|xz'`` | Open an lzma compressed *stream* for |
+ | | writing. |
+ +-------------+--------------------------------------------+
.. class:: TarFile
@@ -263,9 +276,9 @@ be finalized; only the internally used file object will be closed. See the
If *errorlevel* is ``0``, all errors are ignored when using :meth:`TarFile.extract`.
Nevertheless, they appear as error messages in the debug output, when debugging
- is enabled. If ``1``, all *fatal* errors are raised as :exc:`OSError` or
- :exc:`IOError` exceptions. If ``2``, all *non-fatal* errors are raised as
- :exc:`TarError` exceptions as well.
+ is enabled. If ``1``, all *fatal* errors are raised as :exc:`OSError`
+ exceptions. If ``2``, all *non-fatal* errors are raised as :exc:`TarError`
+ exceptions as well.
The *encoding* and *errors* arguments define the character encoding to be
used for reading or writing the archive and how conversion errors are going
@@ -363,15 +376,12 @@ be finalized; only the internally used file object will be closed. See the
.. method:: TarFile.extractfile(member)
Extract a member from the archive as a file object. *member* may be a filename
- or a :class:`TarInfo` object. If *member* is a regular file, a :term:`file-like
- object` is returned. If *member* is a link, a file-like object is constructed from
- the link's target. If *member* is none of the above, :const:`None` is returned.
-
- .. note::
+ or a :class:`TarInfo` object. If *member* is a regular file or a link, an
+ :class:`io.BufferedReader` object is returned. Otherwise, :const:`None` is
+ returned.
- The file-like object is read-only. It provides the methods
- :meth:`read`, :meth:`readline`, :meth:`readlines`, :meth:`seek`, :meth:`tell`,
- and :meth:`close`, and also supports iteration over its lines.
+ .. versionchanged:: 3.3
+ Return an :class:`io.BufferedReader` object.
.. method:: TarFile.add(name, arcname=None, recursive=True, exclude=None, *, filter=None)
diff --git a/Doc/library/telnetlib.rst b/Doc/library/telnetlib.rst
index 646634d..9bc79c5 100644
--- a/Doc/library/telnetlib.rst
+++ b/Doc/library/telnetlib.rst
@@ -162,9 +162,13 @@ Telnet Objects
.. method:: Telnet.write(buffer)
Write a byte string to the socket, doubling any IAC characters. This can
- block if the connection is blocked. May raise :exc:`socket.error` if the
+ block if the connection is blocked. May raise :exc:`OSError` if the
connection is closed.
+ .. versionchanged:: 3.3
+ This method used to raise :exc:`socket.error`, which is now an alias
+ of :exc:`OSError`.
+
.. method:: Telnet.interact()
diff --git a/Doc/library/tempfile.rst b/Doc/library/tempfile.rst
index c143b88..96ead1f 100644
--- a/Doc/library/tempfile.rst
+++ b/Doc/library/tempfile.rst
@@ -25,7 +25,7 @@ instead a string of six random characters is used.
Also, all the user-callable functions now take additional arguments which
allow direct control over the location and name of temporary files. It is
-no longer necessary to use the global *tempdir* and *template* variables.
+no longer necessary to use the global *tempdir* variable.
To maintain backward compatibility, the argument order is somewhat odd; it
is recommended to use keyword arguments for clarity.
@@ -76,8 +76,7 @@ The module defines the following user-callable items:
data is spooled in memory until the file size exceeds *max_size*, or
until the file's :func:`fileno` method is called, at which point the
contents are written to disk and operation proceeds as with
- :func:`TemporaryFile`. Also, it's ``truncate`` method does not
- accept a ``size`` argument.
+ :func:`TemporaryFile`.
The resulting file has one additional method, :func:`rollover`, which
causes the file to roll over to an on-disk file regardless of its size.
@@ -87,6 +86,9 @@ The module defines the following user-callable items:
whether :func:`rollover` has been called. This file-like object can be
used in a :keyword:`with` statement, just like a normal file.
+ .. versionchanged:: 3.3
+ the truncate method now accepts a ``size`` argument.
+
.. function:: TemporaryDirectory(suffix='', prefix='tmp', dir=None)
diff --git a/Doc/library/test.rst b/Doc/library/test.rst
index 40f4115..2e8ba32 100644
--- a/Doc/library/test.rst
+++ b/Doc/library/test.rst
@@ -80,17 +80,12 @@ A basic boilerplate is often used::
... more test classes ...
- def test_main():
- support.run_unittest(MyTestCase1,
- MyTestCase2,
- ... list other tests ...
- )
-
if __name__ == '__main__':
- test_main()
+ unittest.main()
-This boilerplate code allows the testing suite to be run by :mod:`test.regrtest`
-as well as on its own as a script.
+This code pattern allows the testing suite to be run by :mod:`test.regrtest`,
+on its own as a script that supports the :mod:`unittest` CLI, or via the
+`python -m unittest` CLI.
The goal for regression testing is to try to break code. This leads to a few
guidelines to be followed:
@@ -129,22 +124,27 @@ guidelines to be followed:
as what type of input is used. Minimize code duplication by subclassing a
basic test class with a class that specifies the input::
- class TestFuncAcceptsSequences(unittest.TestCase):
+ class TestFuncAcceptsSequencesMixin:
func = mySuperWhammyFunction
def test_func(self):
self.func(self.arg)
- class AcceptLists(TestFuncAcceptsSequences):
+ class AcceptLists(TestFuncAcceptsSequencesMixin, unittest.TestCase):
arg = [1, 2, 3]
- class AcceptStrings(TestFuncAcceptsSequences):
+ class AcceptStrings(TestFuncAcceptsSequencesMixin, unittest.TestCase):
arg = 'abc'
- class AcceptTuples(TestFuncAcceptsSequences):
+ class AcceptTuples(TestFuncAcceptsSequencesMixin, unittest.TestCase):
arg = (1, 2, 3)
+ When using this pattern, remember that all classes that inherit from
+ `unittest.TestCase` are run as tests. The `Mixin` class in the example above
+ does not have any data and so can't be run by itself, thus it does not
+ inherit from `unittest.TestCase`.
+
.. seealso::
@@ -160,14 +160,15 @@ Running tests using the command-line interface
The :mod:`test` package can be run as a script to drive Python's regression
test suite, thanks to the :option:`-m` option: :program:`python -m test`. Under
the hood, it uses :mod:`test.regrtest`; the call :program:`python -m
-test.regrtest` used in previous Python versions still works).
-Running the script by itself automatically starts running all regression
-tests in the :mod:`test` package. It does this by finding all modules in the
-package whose name starts with ``test_``, importing them, and executing the
-function :func:`test_main` if present. The names of tests to execute may also
-be passed to the script. Specifying a single regression test (:program:`python
--m test test_spam`) will minimize output and only print
-whether the test passed or failed and thus minimize output.
+test.regrtest` used in previous Python versions still works). Running the
+script by itself automatically starts running all regression tests in the
+:mod:`test` package. It does this by finding all modules in the package whose
+name starts with ``test_``, importing them, and executing the function
+:func:`test_main` if present or loading the tests via
+unittest.TestLoader.loadTestsFromModule if ``test_main`` does not exist. The
+names of tests to execute may also be passed to the script. Specifying a single
+regression test (:program:`python -m test test_spam`) will minimize output and
+only print whether the test passed or failed.
Running :mod:`test` directly allows what resources are available for
tests to use to be set. You do this by using the ``-u`` command-line
@@ -223,14 +224,14 @@ The :mod:`test.support` module defines the following constants:
.. data:: verbose
- :const:`True` when verbose output is enabled. Should be checked when more
+ ``True`` when verbose output is enabled. Should be checked when more
detailed information is desired about a running test. *verbose* is set by
:mod:`test.regrtest`.
.. data:: is_jython
- :const:`True` if the running interpreter is Jython.
+ ``True`` if the running interpreter is Jython.
.. data:: TESTFN
@@ -249,7 +250,7 @@ The :mod:`test.support` module defines the following functions:
.. function:: is_resource_enabled(resource)
- Return :const:`True` if *resource* is enabled and available. The list of
+ Return ``True`` if *resource* is enabled and available. The list of
available resources is only set when :mod:`test.regrtest` is executing the
tests.
@@ -258,7 +259,7 @@ The :mod:`test.support` module defines the following functions:
Raise :exc:`ResourceDenied` if *resource* is not available. *msg* is the
argument to :exc:`ResourceDenied` if it is raised. Always returns
- :const:`True` if called by a function whose ``__name__`` is ``'__main__'``.
+ ``True`` if called by a function whose ``__name__`` is ``'__main__'``.
Used when tests are executed by :mod:`test.regrtest`.
@@ -286,6 +287,15 @@ The :mod:`test.support` module defines the following functions:
This will run all tests defined in the named module.
+.. function:: run_doctest(module, verbosity=None)
+
+ Run :func:`doctest.testmod` on the given *module*. Return
+ ``(failure_count, test_count)``.
+
+ If *verbosity* is ``None``, :func:`doctest.testmod` is run with verbosity
+ set to :data:`verbose`. Otherwise, it is run with verbosity set to
+ ``None``.
+
.. function:: check_warnings(\*filters, quiet=True)
A convenience wrapper for :func:`warnings.catch_warnings()` that makes it
@@ -296,12 +306,12 @@ The :mod:`test.support` module defines the following functions:
``check_warnings`` accepts 2-tuples of the form ``("message regexp",
WarningCategory)`` as positional arguments. If one or more *filters* are
- provided, or if the optional keyword argument *quiet* is :const:`False`,
+ provided, or if the optional keyword argument *quiet* is ``False``,
it checks to make sure the warnings are as expected: each specified filter
must match at least one of the warnings raised by the enclosed code or the
test fails, and if any warnings are raised that do not match any of the
specified filters the test fails. To disable the first of these checks,
- set *quiet* to :const:`True`.
+ set *quiet* to ``True``.
If no arguments are specified, it defaults to::
@@ -316,7 +326,7 @@ The :mod:`test.support` module defines the following functions:
representing the most recent warning can also be accessed directly through
the recorder object (see example below). If no warning has been raised,
then any of the attributes that would otherwise be expected on an object
- representing a warning will return :const:`None`.
+ representing a warning will return ``None``.
The recorder object also has a :meth:`reset` method, which clears the
warnings list.
@@ -354,7 +364,7 @@ The :mod:`test.support` module defines the following functions:
.. function:: captured_stdout()
- This is a context manager that runs the :keyword:`with` statement body using
+ A context manager that runs the :keyword:`with` statement body using
a :class:`StringIO.StringIO` object as sys.stdout. That object can be
retrieved using the ``as`` clause of the :keyword:`with` statement.
@@ -365,6 +375,57 @@ The :mod:`test.support` module defines the following functions:
assert s.getvalue() == "hello\n"
+.. function:: temp_cwd(name='tempcwd', quiet=False, path=None)
+
+ A context manager that temporarily changes the current working
+ directory (CWD).
+
+ An existing path may be provided as *path*, in which case this function
+ makes no changes to the file system.
+
+ Otherwise, the new CWD is created in the current directory and it's named
+ *name*. If *quiet* is ``False`` and it's not possible to create or
+ change the CWD, an error is raised. If it's ``True``, only a warning
+ is raised and the original CWD is used.
+
+
+.. function:: temp_umask(umask)
+
+ A context manager that temporarily sets the process umask.
+
+
+.. function:: can_symlink()
+
+ Return ``True`` if the OS supports symbolic links, ``False``
+ otherwise.
+
+
+.. decorator:: skip_unless_symlink()
+
+ A decorator for running tests that require support for symbolic links.
+
+
+.. decorator:: anticipate_failure(condition)
+
+ A decorator to conditionally mark tests with
+ :func:`unittest.expectedFailure`. Any use of this decorator should
+ have an associated comment identifying the relevant tracker issue.
+
+
+.. decorator:: run_with_locale(catstr, *locales)
+
+ A decorator for running a function in a different locale, correctly
+ resetting it after it has finished. *catstr* is the locale category as
+ a string (for example ``"LC_ALL"``). The *locales* passed will be tried
+ sequentially, and the first valid locale will be used.
+
+
+.. function:: make_bad_fd()
+
+ Create an invalid file descriptor by opening and closing a temporary file,
+ and returning its descripor.
+
+
.. function:: import_module(name, deprecated=False)
This function imports and returns the named module. Unlike a normal
@@ -372,7 +433,7 @@ The :mod:`test.support` module defines the following functions:
cannot be imported.
Module and package deprecation messages are suppressed during this import
- if *deprecated* is :const:`True`.
+ if *deprecated* is ``True``.
.. versionadded:: 3.1
@@ -396,9 +457,9 @@ The :mod:`test.support` module defines the following functions:
``sys.modules`` when the fresh import is complete.
Module and package deprecation messages are suppressed during this import
- if *deprecated* is :const:`True`.
+ if *deprecated* is ``True``.
- This function will raise :exc:`unittest.SkipTest` is the named module
+ This function will raise :exc:`unittest.SkipTest` if the named module
cannot be imported.
Example use::
@@ -413,6 +474,48 @@ The :mod:`test.support` module defines the following functions:
.. versionadded:: 3.1
+.. function:: bind_port(sock, host=HOST)
+
+ Bind the socket to a free port and return the port number. Relies on
+ ephemeral ports in order to ensure we are using an unbound port. This is
+ important as many tests may be running simultaneously, especially in a
+ buildbot environment. This method raises an exception if the
+ ``sock.family`` is :const:`~socket.AF_INET` and ``sock.type`` is
+ :const:`~socket.SOCK_STREAM`, and the socket has
+ :const:`~socket.SO_REUSEADDR` or :const:`~socket.SO_REUSEPORT` set on it.
+ Tests should never set these socket options for TCP/IP sockets.
+ The only case for setting these options is testing multicasting via
+ multiple UDP sockets.
+
+ Additionally, if the :const:`~socket.SO_EXCLUSIVEADDRUSE` socket option is
+ available (i.e. on Windows), it will be set on the socket. This will
+ prevent anyone else from binding to our host/port for the duration of the
+ test.
+
+
+.. function:: find_unused_port(family=socket.AF_INET, socktype=socket.SOCK_STREAM)
+
+ Returns an unused port that should be suitable for binding. This is
+ achieved by creating a temporary socket with the same family and type as
+ the ``sock`` parameter (default is :const:`~socket.AF_INET`,
+ :const:`~socket.SOCK_STREAM`),
+ and binding it to the specified host address (defaults to ``0.0.0.0``)
+ with the port set to 0, eliciting an unused ephemeral port from the OS.
+ The temporary socket is then closed and deleted, and the ephemeral port is
+ returned.
+
+ Either this method or :func:`bind_port` should be used for any tests
+ where a server socket needs to be bound to a particular port for the
+ duration of the test.
+ Which one to use depends on whether the calling code is creating a python
+ socket, or if an unused port needs to be provided in a constructor
+ or passed to an external program (i.e. the ``-accept`` argument to
+ openssl's s_server mode). Always prefer :func:`bind_port` over
+ :func:`find_unused_port` where possible. Using a hard coded port is
+ discouraged since it can makes multiple instances of the test impossible to
+ run simultaneously, which is a problem for buildbots.
+
+
The :mod:`test.support` module defines the following classes:
.. class:: TransientResource(exc, **kwargs)
diff --git a/Doc/library/text.rst b/Doc/library/text.rst
new file mode 100644
index 0000000..47b6784
--- /dev/null
+++ b/Doc/library/text.rst
@@ -0,0 +1,26 @@
+.. _stringservices:
+.. _textservices:
+
+************************
+Text Processing Services
+************************
+
+The modules described in this chapter provide a wide range of string
+manipulation operations and other text processing services.
+
+The :mod:`codecs` module described under :ref:`binaryservices` is also
+highly relevant to text processing. In addition, see the documentation for
+Python's built-in string type in :ref:`textseq`.
+
+
+.. toctree::
+
+ string.rst
+ re.rst
+ difflib.rst
+ textwrap.rst
+ unicodedata.rst
+ stringprep.rst
+ readline.rst
+ rlcompleter.rst
+
diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
index 3c1ecf6..c625254 100644
--- a/Doc/library/textwrap.rst
+++ b/Doc/library/textwrap.rst
@@ -12,7 +12,7 @@
The :mod:`textwrap` module provides two convenience functions, :func:`wrap` and
:func:`fill`, as well as :class:`TextWrapper`, the class that does all the work,
-and a utility function :func:`dedent`. If you're just wrapping or filling one
+and two utility functions, :func:`dedent` and :func:`indent`. If you're just wrapping or filling one
or two text strings, the convenience functions should be good enough;
otherwise, you should use an instance of :class:`TextWrapper` for efficiency.
@@ -48,9 +48,10 @@ Text is preferably wrapped on whitespaces and right after the hyphens in
hyphenated words; only then will long words be broken if necessary, unless
:attr:`TextWrapper.break_long_words` is set to false.
-An additional utility function, :func:`dedent`, is provided to remove
-indentation from strings that have unwanted whitespace to the left of the text.
-
+Two additional utility function, :func:`dedent` and :func:`indent`, are
+provided to remove indentation from strings that have unwanted whitespace
+to the left of the text and to add an arbitrary prefix to selected lines
+in a block of text.
.. function:: dedent(text)
@@ -75,6 +76,32 @@ indentation from strings that have unwanted whitespace to the left of the text.
print(repr(dedent(s))) # prints 'hello\n world\n'
+.. function:: indent(text, prefix, predicate=None)
+
+ Add *prefix* to the beginning of selected lines in *text*.
+
+ Lines are separated by calling ``text.splitlines(True)``.
+
+ By default, *prefix* is added to all lines that do not consist
+ solely of whitespace (including any line endings).
+
+ For example::
+
+ >>> s = 'hello\n\n \nworld'
+ >>> indent(s, ' ')
+ ' hello\n\n \n world'
+
+ The optional *predicate* argument can be used to control which lines
+ are indented. For example, it is easy to add *prefix* to even empty
+ and whitespace-only lines::
+
+ >>> print(indent(s, '+ ', lambda line: True))
+ + hello
+ +
+ +
+ + world
+
+
.. class:: TextWrapper(**kwargs)
The :class:`TextWrapper` constructor accepts a number of optional keyword
@@ -110,6 +137,15 @@ indentation from strings that have unwanted whitespace to the left of the text.
expanded to spaces using the :meth:`expandtabs` method of *text*.
+ .. attribute:: tabsize
+
+ (default: ``8``) If :attr:`expand_tabs` is true, then all tab characters
+ in *text* will be expanded to zero or more spaces, depending on the
+ current column and the given tab size.
+
+ .. versionadded:: 3.3
+
+
.. attribute:: replace_whitespace
(default: ``True``) If true, after tab expansion but before wrapping,
diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst
index 43087ef..05863a0 100644
--- a/Doc/library/threading.rst
+++ b/Doc/library/threading.rst
@@ -20,19 +20,8 @@ The :mod:`dummy_threading` module is provided for situations where
methods and functions in this module in the Python 2.x series are still
supported by this module.
-.. impl-detail::
-
- Due to the :term:`Global Interpreter Lock`, in CPython only one thread
- can execute Python code at once (even though certain performance-oriented
- libraries might overcome this limitation).
- If you want your application to make better of use of the computational
- resources of multi-core machines, you are advised to use
- :mod:`multiprocessing` or :class:`concurrent.futures.ProcessPoolExecutor`.
- However, threading is still an appropriate model if you want to run
- multiple I/O-bound tasks simultaneously.
-
-This module defines the following functions and objects:
+This module defines the following functions:
.. function:: active_count()
@@ -41,16 +30,6 @@ This module defines the following functions and objects:
count is equal to the length of the list returned by :func:`.enumerate`.
-.. function:: Condition()
- :noindex:
-
- A factory function that returns a new condition variable object. A condition
- variable allows one or more threads to wait until they are notified by another
- thread.
-
- See :ref:`condition-objects`.
-
-
.. function:: current_thread()
Return the current :class:`Thread` object, corresponding to the caller's thread
@@ -59,6 +38,17 @@ This module defines the following functions and objects:
returned.
+.. function:: get_ident()
+
+ Return the 'thread identifier' of the current thread. This is a nonzero
+ integer. Its value has no direct meaning; it is intended as a magic cookie
+ to be used e.g. to index a dictionary of thread-specific data. Thread
+ identifiers may be recycled when a thread exits and another thread is
+ created.
+
+ .. versionadded:: 3.3
+
+
.. function:: enumerate()
Return a list of all :class:`Thread` objects currently alive. The list
@@ -67,89 +57,6 @@ This module defines the following functions and objects:
and threads that have not yet been started.
-.. function:: Event()
- :noindex:
-
- A factory function that returns a new event object. An event manages a flag
- that can be set to true with the :meth:`~Event.set` method and reset to false
- with the :meth:`clear` method. The :meth:`wait` method blocks until the flag
- is true.
-
- See :ref:`event-objects`.
-
-
-.. class:: local
-
- A class that represents thread-local data. Thread-local data are data whose
- values are thread specific. To manage thread-local data, just create an
- instance of :class:`local` (or a subclass) and store attributes on it::
-
- mydata = threading.local()
- mydata.x = 1
-
- The instance's values will be different for separate threads.
-
- For more details and extensive examples, see the documentation string of the
- :mod:`_threading_local` module.
-
-
-.. function:: Lock()
-
- A factory function that returns a new primitive lock object. Once a thread has
- acquired it, subsequent attempts to acquire it block, until it is released; any
- thread may release it.
-
- See :ref:`lock-objects`.
-
-
-.. function:: RLock()
-
- A factory function that returns a new reentrant lock object. A reentrant lock
- must be released by the thread that acquired it. Once a thread has acquired a
- reentrant lock, the same thread may acquire it again without blocking; the
- thread must release it once for each time it has acquired it.
-
- See :ref:`rlock-objects`.
-
-
-.. function:: Semaphore(value=1)
- :noindex:
-
- A factory function that returns a new semaphore object. A semaphore manages a
- counter representing the number of :meth:`release` calls minus the number of
- :meth:`acquire` calls, plus an initial value. The :meth:`acquire` method blocks
- if necessary until it can return without making the counter negative. If not
- given, *value* defaults to 1.
-
- See :ref:`semaphore-objects`.
-
-
-.. function:: BoundedSemaphore(value=1)
-
- A factory function that returns a new bounded semaphore object. A bounded
- semaphore checks to make sure its current value doesn't exceed its initial
- value. If it does, :exc:`ValueError` is raised. In most situations semaphores
- are used to guard resources with limited capacity. If the semaphore is released
- too many times it's a sign of a bug. If not given, *value* defaults to 1.
-
-
-.. class:: Thread
- :noindex:
-
- A class that represents a thread of control. This class can be safely
- subclassed in a limited fashion.
-
- See :ref:`thread-objects`.
-
-
-.. class:: Timer
- :noindex:
-
- A thread that executes a function after a specified interval has passed.
-
- See :ref:`timer-objects`.
-
-
.. function:: settrace(func)
.. index:: single: trace function
@@ -174,7 +81,7 @@ This module defines the following functions and objects:
*size* argument specifies the stack size to be used for subsequently created
threads, and must be 0 (use platform or configured default) or a positive
integer value of at least 32,768 (32kB). If changing the thread stack size is
- unsupported, a :exc:`ThreadError` is raised. If the specified stack size is
+ unsupported, a :exc:`RuntimeError` is raised. If the specified stack size is
invalid, a :exc:`ValueError` is raised and the stack size is unmodified. 32kB
is currently the minimum supported stack size value to guarantee sufficient
stack space for the interpreter itself. Note that some platforms may have
@@ -198,7 +105,8 @@ This module also defines the following constant:
.. versionadded:: 3.2
-Detailed interfaces for the objects are documented below.
+This module defines a number of classes, which are detailed in the sections
+below.
The design of this module is loosely based on Java's threading model. However,
where Java makes locks and condition variables basic behavior of every object,
@@ -211,17 +119,38 @@ when implemented, are mapped to module-level functions.
All of the methods described below are executed atomically.
+Thread-Local Data
+-----------------
+
+Thread-local data is data whose values are thread specific. To manage
+thread-local data, just create an instance of :class:`local` (or a
+subclass) and store attributes on it::
+
+ mydata = threading.local()
+ mydata.x = 1
+
+The instance's values will be different for separate threads.
+
+
+.. class:: local()
+
+ A class that represents thread-local data.
+
+ For more details and extensive examples, see the documentation string of the
+ :mod:`_threading_local` module.
+
+
.. _thread-objects:
Thread Objects
--------------
-This class represents an activity that is run in a separate thread of control.
-There are two ways to specify the activity: by passing a callable object to the
-constructor, or by overriding the :meth:`~Thread.run` method in a subclass.
-No other methods (except for the constructor) should be overridden in a
-subclass. In other words, *only* override the :meth:`~Thread.__init__`
-and :meth:`~Thread.run` methods of this class.
+The :class:`Thread` class represents an activity that is run in a separate
+thread of control. There are two ways to specify the activity: by passing a
+callable object to the constructor, or by overriding the :meth:`~Thread.run`
+method in a subclass. No other methods (except for the constructor) should be
+overridden in a subclass. In other words, *only* override the
+:meth:`~Thread.__init__` and :meth:`~Thread.run` methods of this class.
Once a thread object is created, its activity must be started by calling the
thread's :meth:`~Thread.start` method. This invokes the :meth:`~Thread.run`
@@ -239,10 +168,11 @@ called is terminated.
A thread has a name. The name can be passed to the constructor, and read or
changed through the :attr:`~Thread.name` attribute.
-A thread can be flagged as a "daemon thread". The significance of this flag
-is that the entire Python program exits when only daemon threads are left.
-The initial value is inherited from the creating thread. The flag can be
-set through the :attr:`~Thread.daemon` property.
+A thread can be flagged as a "daemon thread". The significance of this flag is
+that the entire Python program exits when only daemon threads are left. The
+initial value is inherited from the creating thread. The flag can be set
+through the :attr:`~Thread.daemon` property or the *daemon* constructor
+argument.
There is a "main thread" object; this corresponds to the initial thread of
control in the Python program. It is not a daemon thread.
@@ -255,7 +185,8 @@ daemonic, and cannot be :meth:`~Thread.join`\ ed. They are never deleted,
since it is impossible to detect the termination of alien threads.
-.. class:: Thread(group=None, target=None, name=None, args=(), kwargs={})
+.. class:: Thread(group=None, target=None, name=None, args=(), kwargs={}, *, \
+ daemon=None)
This constructor should always be called with keyword arguments. Arguments
are:
@@ -274,10 +205,17 @@ since it is impossible to detect the termination of alien threads.
*kwargs* is a dictionary of keyword arguments for the target invocation.
Defaults to ``{}``.
+ If not ``None``, *daemon* explicitly sets whether the thread is daemonic.
+ If ``None`` (the default), the daemonic property is inherited from the
+ current thread.
+
If the subclass overrides the constructor, it must make sure to invoke the
base class constructor (``Thread.__init__()``) before doing anything else to
the thread.
+ .. versionchanged:: 3.3
+ Added the *daemon* argument.
+
.. method:: start()
Start the thread's activity.
@@ -368,6 +306,18 @@ since it is impossible to detect the termination of alien threads.
property instead.
+.. impl-detail::
+
+ Due to the :term:`Global Interpreter Lock`, in CPython only one thread
+ can execute Python code at once (even though certain performance-oriented
+ libraries might overcome this limitation).
+ If you want your application to make better of use of the computational
+ resources of multi-core machines, you are advised to use
+ :mod:`multiprocessing` or :class:`concurrent.futures.ProcessPoolExecutor`.
+ However, threading is still an appropriate model if you want to run
+ multiple I/O-bound tasks simultaneously.
+
+
.. _lock-objects:
Lock Objects
@@ -399,45 +349,55 @@ is not defined, and may vary across implementations.
All methods are executed atomically.
-.. method:: Lock.acquire(blocking=True, timeout=-1)
+.. class:: Lock()
+
+ The class implementing primitive lock objects. Once a thread has acquired a
+ lock, subsequent attempts to acquire it block, until it is released; any
+ thread may release it.
+
+ .. versionchanged:: 3.3
+ Changed from a factory function to a class.
+
- Acquire a lock, blocking or non-blocking.
+ .. method:: acquire(blocking=True, timeout=-1)
- When invoked with the *blocking* argument set to ``True`` (the default),
- block until the lock is unlocked, then set it to locked and return ``True``.
+ Acquire a lock, blocking or non-blocking.
- When invoked with the *blocking* argument set to ``False``, do not block.
- If a call with *blocking* set to ``True`` would block, return ``False``
- immediately; otherwise, set the lock to locked and return ``True``.
+ When invoked with the *blocking* argument set to ``True`` (the default),
+ block until the lock is unlocked, then set it to locked and return ``True``.
- When invoked with the floating-point *timeout* argument set to a positive
- value, block for at most the number of seconds specified by *timeout*
- and as long as the lock cannot be acquired. A negative *timeout* argument
- specifies an unbounded wait. It is forbidden to specify a *timeout*
- when *blocking* is false.
+ When invoked with the *blocking* argument set to ``False``, do not block.
+ If a call with *blocking* set to ``True`` would block, return ``False``
+ immediately; otherwise, set the lock to locked and return ``True``.
- The return value is ``True`` if the lock is acquired successfully,
- ``False`` if not (for example if the *timeout* expired).
+ When invoked with the floating-point *timeout* argument set to a positive
+ value, block for at most the number of seconds specified by *timeout*
+ and as long as the lock cannot be acquired. A negative *timeout* argument
+ specifies an unbounded wait. It is forbidden to specify a *timeout*
+ when *blocking* is false.
- .. versionchanged:: 3.2
- The *timeout* parameter is new.
+ The return value is ``True`` if the lock is acquired successfully,
+ ``False`` if not (for example if the *timeout* expired).
- .. versionchanged:: 3.2
- Lock acquires can now be interrupted by signals on POSIX.
+ .. versionchanged:: 3.2
+ The *timeout* parameter is new.
+
+ .. versionchanged:: 3.2
+ Lock acquires can now be interrupted by signals on POSIX.
-.. method:: Lock.release()
+ .. method:: release()
- Release a lock. This can be called from any thread, not only the thread
- which has acquired the lock.
+ Release a lock. This can be called from any thread, not only the thread
+ which has acquired the lock.
- When the lock is locked, reset it to unlocked, and return. If any other threads
- are blocked waiting for the lock to become unlocked, allow exactly one of them
- to proceed.
+ When the lock is locked, reset it to unlocked, and return. If any other threads
+ are blocked waiting for the lock to become unlocked, allow exactly one of them
+ to proceed.
- When invoked on an unlocked lock, a :exc:`ThreadError` is raised.
+ When invoked on an unlocked lock, a :exc:`RuntimeError` is raised.
- There is no return value.
+ There is no return value.
.. _rlock-objects:
@@ -461,47 +421,59 @@ allows another thread blocked in :meth:`~Lock.acquire` to proceed.
Reentrant locks also support the :ref:`context manager protocol <with-locks>`.
-.. method:: RLock.acquire(blocking=True, timeout=-1)
+.. class:: RLock()
- Acquire a lock, blocking or non-blocking.
+ This class implements reentrant lock objects. A reentrant lock must be
+ released by the thread that acquired it. Once a thread has acquired a
+ reentrant lock, the same thread may acquire it again without blocking; the
+ thread must release it once for each time it has acquired it.
- When invoked without arguments: if this thread already owns the lock, increment
- the recursion level by one, and return immediately. Otherwise, if another
- thread owns the lock, block until the lock is unlocked. Once the lock is
- unlocked (not owned by any thread), then grab ownership, set the recursion level
- to one, and return. If more than one thread is blocked waiting until the lock
- is unlocked, only one at a time will be able to grab ownership of the lock.
- There is no return value in this case.
+ Note that ``RLock`` is actually a factory function which returns an instance
+ of the most efficient version of the concrete RLock class that is supported
+ by the platform.
- When invoked with the *blocking* argument set to true, do the same thing as when
- called without arguments, and return true.
- When invoked with the *blocking* argument set to false, do not block. If a call
- without an argument would block, return false immediately; otherwise, do the
- same thing as when called without arguments, and return true.
+ .. method:: acquire(blocking=True, timeout=-1)
- When invoked with the floating-point *timeout* argument set to a positive
- value, block for at most the number of seconds specified by *timeout*
- and as long as the lock cannot be acquired. Return true if the lock has
- been acquired, false if the timeout has elapsed.
+ Acquire a lock, blocking or non-blocking.
- .. versionchanged:: 3.2
- The *timeout* parameter is new.
+ When invoked without arguments: if this thread already owns the lock, increment
+ the recursion level by one, and return immediately. Otherwise, if another
+ thread owns the lock, block until the lock is unlocked. Once the lock is
+ unlocked (not owned by any thread), then grab ownership, set the recursion level
+ to one, and return. If more than one thread is blocked waiting until the lock
+ is unlocked, only one at a time will be able to grab ownership of the lock.
+ There is no return value in this case.
+ When invoked with the *blocking* argument set to true, do the same thing as when
+ called without arguments, and return true.
-.. method:: RLock.release()
+ When invoked with the *blocking* argument set to false, do not block. If a call
+ without an argument would block, return false immediately; otherwise, do the
+ same thing as when called without arguments, and return true.
- Release a lock, decrementing the recursion level. If after the decrement it is
- zero, reset the lock to unlocked (not owned by any thread), and if any other
- threads are blocked waiting for the lock to become unlocked, allow exactly one
- of them to proceed. If after the decrement the recursion level is still
- nonzero, the lock remains locked and owned by the calling thread.
+ When invoked with the floating-point *timeout* argument set to a positive
+ value, block for at most the number of seconds specified by *timeout*
+ and as long as the lock cannot be acquired. Return true if the lock has
+ been acquired, false if the timeout has elapsed.
- Only call this method when the calling thread owns the lock. A
- :exc:`RuntimeError` is raised if this method is called when the lock is
- unlocked.
+ .. versionchanged:: 3.2
+ The *timeout* parameter is new.
+
+
+ .. method:: release()
- There is no return value.
+ Release a lock, decrementing the recursion level. If after the decrement it is
+ zero, reset the lock to unlocked (not owned by any thread), and if any other
+ threads are blocked waiting for the lock to become unlocked, allow exactly one
+ of them to proceed. If after the decrement the recursion level is still
+ nonzero, the lock remains locked and owned by the calling thread.
+
+ Only call this method when the calling thread owns the lock. A
+ :exc:`RuntimeError` is raised if this method is called when the lock is
+ unlocked.
+
+ There is no return value.
.. _condition-objects:
@@ -536,10 +508,6 @@ not return from their :meth:`~Condition.wait` call immediately, but only when
the thread that called :meth:`~Condition.notify` or :meth:`~Condition.notify_all`
finally relinquishes ownership of the lock.
-
-Usage
-^^^^^
-
The typical programming style using condition variables uses the lock to
synchronize access to some shared state; threads that are interested in a
particular change of state call :meth:`~Condition.wait` repeatedly until they
@@ -578,15 +546,18 @@ waiting threads. E.g. in a typical producer-consumer situation, adding one
item to the buffer only needs to wake up one consumer thread.
-Interface
-^^^^^^^^^
-
.. class:: Condition(lock=None)
+ This class implements condition variable objects. A condition variable
+ allows one or more threads to wait until they are notified by another thread.
+
If the *lock* argument is given and not ``None``, it must be a :class:`Lock`
or :class:`RLock` object, and it is used as the underlying lock. Otherwise,
a new :class:`RLock` object is created and used as the underlying lock.
+ .. versionchanged:: 3.3
+ changed from a factory function to a class.
+
.. method:: acquire(*args)
Acquire the underlying lock. This method calls the corresponding method on
@@ -696,10 +667,19 @@ Semaphores also support the :ref:`context manager protocol <with-locks>`.
.. class:: Semaphore(value=1)
+ This class implements semaphore objects. A semaphore manages a counter
+ representing the number of :meth:`release` calls minus the number of
+ :meth:`acquire` calls, plus an initial value. The :meth:`acquire` method
+ blocks if necessary until it can return without making the counter negative.
+ If not given, *value* defaults to 1.
+
The optional argument gives the initial *value* for the internal counter; it
defaults to ``1``. If the *value* given is less than 0, :exc:`ValueError` is
raised.
+ .. versionchanged:: 3.3
+ changed from a factory function to a class.
+
.. method:: acquire(blocking=True, timeout=None)
Acquire a semaphore.
@@ -732,6 +712,18 @@ Semaphores also support the :ref:`context manager protocol <with-locks>`.
than zero again, wake up that thread.
+.. class:: BoundedSemaphore(value=1)
+
+ Class implementing bounded semaphore objects. A bounded semaphore checks to
+ make sure its current value doesn't exceed its initial value. If it does,
+ :exc:`ValueError` is raised. In most situations semaphores are used to guard
+ resources with limited capacity. If the semaphore is released too many times
+ it's a sign of a bug. If not given, *value* defaults to 1.
+
+ .. versionchanged:: 3.3
+ changed from a factory function to a class.
+
+
.. _semaphore-examples:
:class:`Semaphore` Example
@@ -743,7 +735,7 @@ you should use a bounded semaphore. Before spawning any worker threads, your
main thread would initialize the semaphore::
maxconnections = 5
- ...
+ # ...
pool_sema = BoundedSemaphore(value=maxconnections)
Once spawned, worker threads call the semaphore's acquire and release methods
@@ -752,7 +744,7 @@ when they need to connect to the server::
with pool_sema:
conn = connectdb()
try:
- ... use connection ...
+ # ... use connection ...
finally:
conn.close()
@@ -775,7 +767,13 @@ method. The :meth:`~Event.wait` method blocks until the flag is true.
.. class:: Event()
- The internal flag is initially false.
+ Class implementing event objects. An event manages a flag that can be set to
+ true with the :meth:`~Event.set` method and reset to false with the
+ :meth:`clear` method. The :meth:`wait` method blocks until the flag is true.
+ The flag is initially false.
+
+ .. versionchanged:: 3.3
+ changed from a factory function to a class.
.. method:: is_set()
@@ -840,6 +838,9 @@ For example::
Create a timer that will run *function* with arguments *args* and keyword
arguments *kwargs*, after *interval* seconds have passed.
+ .. versionchanged:: 3.3
+ changed from a factory function to a class.
+
.. method:: cancel()
Stop the timer, and cancel the execution of the timer's action. This will
@@ -973,27 +974,3 @@ is equivalent to::
Currently, :class:`Lock`, :class:`RLock`, :class:`Condition`,
:class:`Semaphore`, and :class:`BoundedSemaphore` objects may be used as
:keyword:`with` statement context managers.
-
-
-.. _threaded-imports:
-
-Importing in threaded code
---------------------------
-
-While the import machinery is thread-safe, there are two key restrictions on
-threaded imports due to inherent limitations in the way that thread-safety is
-provided:
-
-* Firstly, other than in the main module, an import should not have the
- side effect of spawning a new thread and then waiting for that thread in
- any way. Failing to abide by this restriction can lead to a deadlock if
- the spawned thread directly or indirectly attempts to import a module.
-* Secondly, all import attempts must be completed before the interpreter
- starts shutting itself down. This can be most easily achieved by only
- performing imports from non-daemon threads created through the threading
- module. Daemon threads and threads created directly with the thread
- module will require some other form of synchronization to ensure they do
- not attempt imports after system shutdown has commenced. Failure to
- abide by this restriction will lead to intermittent exceptions and
- crashes during interpreter shutdown (as the late imports attempt to
- access machinery which is no longer in a valid state).
diff --git a/Doc/library/time.rst b/Doc/library/time.rst
index e0c7007..bad3f47 100644
--- a/Doc/library/time.rst
+++ b/Doc/library/time.rst
@@ -41,25 +41,6 @@ An explanation of some terminology and conventions is in order.
parsed, they are converted according to the POSIX and ISO C standards: values
69--99 are mapped to 1969--1999, and values 0--68 are mapped to 2000--2068.
- For backward compatibility, years with less than 4 digits are treated
- specially by :func:`asctime`, :func:`mktime`, and :func:`strftime` functions
- that operate on a 9-tuple or :class:`struct_time` values. If year (the first
- value in the 9-tuple) is specified with less than 4 digits, its interpretation
- depends on the value of ``accept2dyear`` variable.
-
- If ``accept2dyear`` is true (default), a backward compatibility behavior is
- invoked as follows:
-
- - for 2-digit year, century is guessed according to POSIX rules for
- ``%y`` strptime format. A deprecation warning is issued when century
- information is guessed in this way.
-
- - for 3-digit or negative year, a :exc:`ValueError` exception is raised.
-
- If ``accept2dyear`` is false (set by the program or as a result of a
- non-empty value assigned to ``PYTHONY2K`` environment variable) all year
- values are interpreted as given.
-
.. index::
single: UTC
single: Coordinated Universal Time
@@ -96,6 +77,11 @@ An explanation of some terminology and conventions is in order.
See :class:`struct_time` for a description of these objects.
+ .. versionchanged:: 3.3
+ The :class:`struct_time` type was extended to provide the :attr:`tm_gmtoff`
+ and :attr:`tm_zone` attributes when platform supports corresponding
+ ``struct tm`` members.
+
* Use the following functions to convert between time representations:
+-------------------------+-------------------------+-------------------------+
@@ -117,24 +103,6 @@ An explanation of some terminology and conventions is in order.
The module defines the following functions and data items:
-
-.. data:: accept2dyear
-
- Boolean value indicating whether two-digit year values will be
- mapped to 1969--2068 range by :func:`asctime`, :func:`mktime`, and
- :func:`strftime` functions. This is true by default, but will be
- set to false if the environment variable :envvar:`PYTHONY2K` has
- been set to a non-empty string. It may also be modified at run
- time.
-
- .. deprecated:: 3.2
- Mapping of 2-digit year values by :func:`asctime`,
- :func:`mktime`, and :func:`strftime` functions to 1969--2068
- range is deprecated. Programs that need to process 2-digit
- years should use ``%y`` code available in :func:`strptime`
- function or convert 2-digit year values to 4-digit themselves.
-
-
.. data:: altzone
The offset of the local DST timezone, in seconds west of UTC, if one is defined.
@@ -152,7 +120,8 @@ The module defines the following functions and data items:
.. note::
- Unlike the C function of the same name, there is no trailing newline.
+ Unlike the C function of the same name, :func:`asctime` does not add a
+ trailing newline.
.. function:: clock()
@@ -172,6 +141,97 @@ The module defines the following functions and data items:
:c:func:`QueryPerformanceCounter`. The resolution is typically better than one
microsecond.
+ .. deprecated:: 3.3
+ The behaviour of this function depends on the platform: use
+ :func:`perf_counter` or :func:`process_time` instead, depending on your
+ requirements, to have a well defined behaviour.
+
+
+.. function:: clock_getres(clk_id)
+
+ Return the resolution (precision) of the specified clock *clk_id*.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: clock_gettime(clk_id)
+
+ Return the time of the specified clock *clk_id*.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. function:: clock_settime(clk_id, time)
+
+ Set the time of the specified clock *clk_id*.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. data:: CLOCK_HIGHRES
+
+ The Solaris OS has a CLOCK_HIGHRES timer that attempts to use an optimal
+ hardware source, and may give close to nanosecond resolution. CLOCK_HIGHRES
+ is the nonadjustable, high-resolution clock.
+
+ Availability: Solaris.
+
+ .. versionadded:: 3.3
+
+
+.. data:: CLOCK_MONOTONIC
+
+ Clock that cannot be set and represents monotonic time since some unspecified
+ starting point.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. data:: CLOCK_MONOTONIC_RAW
+
+ Similar to :data:`CLOCK_MONOTONIC`, but provides access to a raw
+ hardware-based time that is not subject to NTP adjustments.
+
+ Availability: Linux 2.6.28 or later.
+
+ .. versionadded:: 3.3
+
+
+.. data:: CLOCK_PROCESS_CPUTIME_ID
+
+ High-resolution per-process timer from the CPU.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. data:: CLOCK_REALTIME
+
+ System-wide real-time clock. Setting this clock requires appropriate
+ privileges.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
+
+.. data:: CLOCK_THREAD_CPUTIME_ID
+
+ Thread-specific CPU-time clock.
+
+ Availability: Unix.
+
+ .. versionadded:: 3.3
+
.. function:: ctime([secs])
@@ -186,6 +246,31 @@ The module defines the following functions and data items:
Nonzero if a DST timezone is defined.
+.. function:: get_clock_info(name)
+
+ Get information on the specified clock as a namespace object.
+ Supported clock names and the corresponding functions to read their value
+ are:
+
+ * ``'clock'``: :func:`time.clock`
+ * ``'monotonic'``: :func:`time.monotonic`
+ * ``'perf_counter'``: :func:`time.perf_counter`
+ * ``'process_time'``: :func:`time.process_time`
+ * ``'time'``: :func:`time.time`
+
+ The result has the following attributes:
+
+ - *adjustable*: ``True`` if the clock can be changed automatically (e.g. by
+ a NTP daemon) or manually by the system administrator, ``False`` otherwise
+ - *implementation*: The name of the underlying C function used to get
+ the clock value
+ - *monotonic*: ``True`` if the clock cannot go backward,
+ ``False`` otherwise
+ - *resolution*: The resolution of the clock in seconds (:class:`float`)
+
+ .. versionadded:: 3.3
+
+
.. function:: gmtime([secs])
Convert a time expressed in seconds since the epoch to a :class:`struct_time` in
@@ -215,6 +300,47 @@ The module defines the following functions and data items:
The earliest date for which it can generate a time is platform-dependent.
+.. function:: monotonic()
+
+ Return the value (in fractional seconds) of a monotonic clock, i.e. a clock
+ that cannot go backwards. The clock is not affected by system clock updates.
+ The reference point of the returned value is undefined, so that only the
+ difference between the results of consecutive calls is valid.
+
+ On Windows versions older than Vista, :func:`monotonic` detects
+ :c:func:`GetTickCount` integer overflow (32 bits, roll-over after 49.7 days).
+ It increases an internal epoch (reference time by) 2\ :sup:`32` each time
+ that an overflow is detected. The epoch is stored in the process-local state
+ and so the value of :func:`monotonic` may be different in two Python
+ processes running for more than 49 days. On more recent versions of Windows
+ and on other operating systems, :func:`monotonic` is system-wide.
+
+ Availability: Windows, Mac OS X, Linux, FreeBSD, OpenBSD, Solaris.
+
+ .. versionadded:: 3.3
+
+
+.. function:: perf_counter()
+
+ Return the value (in fractional seconds) of a performance counter, i.e. a
+ clock with the highest available resolution to measure a short duration. It
+ does include time elapsed during sleep and is system-wide. The reference
+ point of the returned value is undefined, so that only the difference between
+ the results of consecutive calls is valid.
+
+ .. versionadded:: 3.3
+
+
+.. function:: process_time()
+
+ Return the value (in fractional seconds) of the sum of the system and user
+ CPU time of the current process. It does not include time elapsed during
+ sleep. It is process-wide by definition. The reference point of the
+ returned value is undefined, so that only the difference between the results
+ of consecutive calls is valid.
+
+ .. versionadded:: 3.3
+
.. function:: sleep(secs)
Suspend execution for the given number of seconds. The argument may be a
@@ -308,9 +434,15 @@ The module defines the following functions and data items:
| ``%y`` | Year without century as a decimal number | |
| | [00,99]. | |
+-----------+------------------------------------------------+-------+
- | ``%Y`` | Year with century as a decimal number. | \(4) |
+ | ``%Y`` | Year with century as a decimal number. | |
| | | |
+-----------+------------------------------------------------+-------+
+ | ``%z`` | Time zone offset indicating a positive or | |
+ | | negative time difference from UTC/GMT of the | |
+ | | form +HHMM or -HHMM, where H represents decimal| |
+ | | hour digits and M represents decimal minute | |
+ | | digits [-23:59, +23:59]. | |
+ +-----------+------------------------------------------------+-------+
| ``%Z`` | Time zone name (no characters if no time zone | |
| | exists). | |
+-----------+------------------------------------------------+-------+
@@ -332,12 +464,6 @@ The module defines the following functions and data items:
When used with the :func:`strptime` function, ``%U`` and ``%W`` are only used in
calculations when the day of the week and the year are specified.
- (4)
- Produces different results depending on the value of
- ``time.accept2dyear`` variable. See :ref:`Year 2000 (Y2K)
- issues <time-y2kissues>` for details.
-
-
Here is an example, a format for dates compatible with that specified in the
:rfc:`2822` Internet email standard. [#]_ ::
@@ -416,10 +542,13 @@ The module defines the following functions and data items:
+-------+-------------------+---------------------------------+
| 8 | :attr:`tm_isdst` | 0, 1 or -1; see below |
+-------+-------------------+---------------------------------+
+ | N/A | :attr:`tm_zone` | abbreviation of timezone name |
+ +-------+-------------------+---------------------------------+
+ | N/A | :attr:`tm_gmtoff` | offset east of UTC in seconds |
+ +-------+-------------------+---------------------------------+
Note that unlike the C structure, the month value is a range of [1, 12], not
- [0, 11]. A year value will be handled as described under :ref:`Year 2000
- (Y2K) issues <time-y2kissues>` above. A ``-1`` argument as the daylight
+ [0, 11]. A ``-1`` argument as the daylight
savings flag, passed to :func:`mktime` will usually result in the correct
daylight savings state to be filled in.
@@ -427,6 +556,9 @@ The module defines the following functions and data items:
:class:`struct_time`, or having elements of the wrong type, a
:exc:`TypeError` is raised.
+ .. versionchanged:: 3.3
+ :attr:`tm_gmtoff` and :attr:`tm_zone` attributes are available on platforms
+ with C library supporting the corresponding fields in ``struct tm``.
.. function:: time()
@@ -437,7 +569,6 @@ The module defines the following functions and data items:
lower value than a previous call if the system clock has been set back between
the two calls.
-
.. data:: timezone
The offset of the local (non-DST) timezone, in seconds west of UTC (negative in
diff --git a/Doc/library/timeit.rst b/Doc/library/timeit.rst
index a3ec66f..a487917 100644
--- a/Doc/library/timeit.rst
+++ b/Doc/library/timeit.rst
@@ -73,13 +73,10 @@ The module defines three convenience functions and a public class:
.. function:: default_timer()
- Define a default timer, in a platform-specific manner. On Windows,
- :func:`time.clock` has microsecond granularity, but :func:`time.time`'s
- granularity is 1/60th of a second. On Unix, :func:`time.clock` has 1/100th of
- a second granularity, and :func:`time.time` is much more precise. On either
- platform, :func:`default_timer` measures wall clock time, not the CPU
- time. This means that other processes running on the same computer may
- interfere with the timing.
+ The default timer, which is always :func:`time.perf_counter`.
+
+ .. versionchanged:: 3.3
+ :func:`time.perf_counter` is now the default timer.
.. class:: Timer(stmt='pass', setup='pass', timer=<timer function>)
@@ -187,13 +184,20 @@ Where the following options are understood:
statement to be executed once initially (default ``pass``)
+.. cmdoption:: -p, --process
+
+ measure process time, not wallclock time, using :func:`time.process_time`
+ instead of :func:`time.perf_counter`, which is the default
+
+ .. versionadded:: 3.3
+
.. cmdoption:: -t, --time
- use :func:`time.time` (default on all platforms but Windows)
+ use :func:`time.time` (deprecated)
.. cmdoption:: -c, --clock
- use :func:`time.clock` (default on Windows)
+ use :func:`time.clock` (deprecated)
.. cmdoption:: -v, --verbose
@@ -211,12 +215,11 @@ similarly.
If :option:`-n` is not given, a suitable number of loops is calculated by trying
successive powers of 10 until the total time is at least 0.2 seconds.
-:func:`default_timer` measurations can be affected by other programs running on
-the same machine, so
-the best thing to do when accurate timing is necessary is to repeat
-the timing a few times and use the best time. The :option:`-r` option is good
-for this; the default of 3 repetitions is probably enough in most cases. On
-Unix, you can use :func:`time.clock` to measure CPU time.
+:func:`default_timer` measurements can be affected by other programs running on
+the same machine, so the best thing to do when accurate timing is necessary is
+to repeat the timing a few times and use the best time. The :option:`-r`
+option is good for this; the default of 3 repetitions is probably enough in
+most cases. You can use :func:`time.process_time` to measure CPU time.
.. note::
diff --git a/Doc/library/tkinter.rst b/Doc/library/tkinter.rst
index f6e095a..83a5375 100644
--- a/Doc/library/tkinter.rst
+++ b/Doc/library/tkinter.rst
@@ -194,35 +194,30 @@ A Simple Hello World Program
::
- from tkinter import *
-
- class Application(Frame):
- def say_hi(self):
- print("hi there, everyone!")
-
- def createWidgets(self):
- self.QUIT = Button(self)
- self.QUIT["text"] = "QUIT"
- self.QUIT["fg"] = "red"
- self.QUIT["command"] = self.quit
+ import tkinter as tk
- self.QUIT.pack({"side": "left"})
+ class Application(tk.Frame):
+ def __init__(self, master=None):
+ tk.Frame.__init__(self, master)
+ self.pack()
+ self.createWidgets()
- self.hi_there = Button(self)
- self.hi_there["text"] = "Hello",
- self.hi_there["command"] = self.say_hi
+ def createWidgets(self):
+ self.hi_there = tk.Button(self)
+ self.hi_there["text"] = "Hello World\n(click me)"
+ self.hi_there["command"] = self.say_hi
+ self.hi_there.pack(side="top")
- self.hi_there.pack({"side": "left"})
+ self.QUIT = tk.Button(self, text="QUIT", fg="red",
+ command=root.destroy)
+ self.QUIT.pack(side="bottom")
- def __init__(self, master=None):
- Frame.__init__(self, master)
- self.pack()
- self.createWidgets()
+ def say_hi(self):
+ print("hi there, everyone!")
- root = Tk()
- app = Application(master=root)
- app.mainloop()
- root.destroy()
+ root = tk.Tk()
+ app = Application(master=root)
+ app.mainloop()
A (Very) Quick Look at Tcl/Tk
@@ -755,22 +750,32 @@ Entry widget indexes (index, view index, etc.)
displayed. You can use these :mod:`tkinter` functions to access these special
points in text widgets:
- AtEnd()
+.. function:: AtEnd()
refers to the last position in the text
- AtInsert()
+ .. deprecated:: 3.3
+
+.. function:: AtInsert()
refers to the point where the text cursor is
- AtSelFirst()
+ .. deprecated:: 3.3
+
+.. function:: AtSelFirst()
indicates the beginning point of the selected text
- AtSelLast()
+ .. deprecated:: 3.3
+
+.. function:: AtSelLast()
denotes the last point of the selected text and finally
- At(x[, y])
+ .. deprecated:: 3.3
+
+.. function:: At(x[, y])
refers to the character at pixel location *x*, *y* (with *y* not used in the
case of a text entry widget, which contains a single line of text).
+ .. deprecated:: 3.3
+
Text widget indexes
The index notation for Text widgets is very rich and is best described in the Tk
man pages.
@@ -818,4 +823,3 @@ some widget (e.g. labels, buttons, menus). In these cases, Tk will not keep a
reference to the image. When the last Python reference to the image object is
deleted, the image data is deleted as well, and Tk will display an empty box
wherever the image was used.
-
diff --git a/Doc/library/tokenize.rst b/Doc/library/tokenize.rst
index 70919ca..37d9f41 100644
--- a/Doc/library/tokenize.rst
+++ b/Doc/library/tokenize.rst
@@ -17,9 +17,11 @@ colorizers for on-screen displays.
To simplify token stream handling, all :ref:`operators` and :ref:`delimiters`
tokens are returned using the generic :data:`token.OP` token type. The exact
-type can be determined by checking the token ``string`` field on the
-:term:`named tuple` returned from :func:`tokenize.tokenize` for the character
-sequence that identifies a specific operator token.
+type can be determined by checking the ``exact_type`` property on the
+:term:`named tuple` returned from :func:`tokenize.tokenize`.
+
+Tokenizing Input
+----------------
The primary entry point is a :term:`generator`:
@@ -39,9 +41,17 @@ The primary entry point is a :term:`generator`:
returned as a :term:`named tuple` with the field names:
``type string start end line``.
+ The returned :term:`named tuple` has a additional property named
+ ``exact_type`` that contains the exact operator type for
+ :data:`token.OP` tokens. For all other token types ``exact_type``
+ equals the named tuple ``type`` field.
+
.. versionchanged:: 3.1
Added support for named tuples.
+ .. versionchanged:: 3.3
+ Added support for ``exact_type``.
+
:func:`tokenize` determines the source encoding of the file by looking for a
UTF-8 BOM or encoding cookie, according to :pep:`263`.
@@ -122,6 +132,38 @@ function it uses to do this is available:
.. versionadded:: 3.2
+.. _tokenize-cli:
+
+Command-Line Usage
+------------------
+
+.. versionadded:: 3.3
+
+The :mod:`tokenize` module can be executed as a script from the command line.
+It is as simple as:
+
+.. code-block:: sh
+
+ python -m tokenize [-e] [filename.py]
+
+The following options are accepted:
+
+.. program:: tokenize
+
+.. cmdoption:: -h, --help
+
+ show this help message and exit
+
+.. cmdoption:: -e, --exact
+
+ display token names using the exact type
+
+If :file:`filename.py` is specified its contents are tokenized to stdout.
+Otherwise, tokenization is performed on stdin.
+
+Examples
+------------------
+
Example of a script rewriter that transforms float literals into Decimal
objects::
@@ -164,3 +206,63 @@ objects::
result.append((toknum, tokval))
return untokenize(result).decode('utf-8')
+Example of tokenizing from the command line. The script::
+
+ def say_hello():
+ print("Hello, World!")
+
+ say_hello()
+
+will be tokenized to the following output where the first column is the range
+of the line/column coordinates where the token is found, the second column is
+the name of the token, and the final column is the value of the token (if any)
+
+.. code-block:: sh
+
+ $ python -m tokenize hello.py
+ 0,0-0,0: ENCODING 'utf-8'
+ 1,0-1,3: NAME 'def'
+ 1,4-1,13: NAME 'say_hello'
+ 1,13-1,14: OP '('
+ 1,14-1,15: OP ')'
+ 1,15-1,16: OP ':'
+ 1,16-1,17: NEWLINE '\n'
+ 2,0-2,4: INDENT ' '
+ 2,4-2,9: NAME 'print'
+ 2,9-2,10: OP '('
+ 2,10-2,25: STRING '"Hello, World!"'
+ 2,25-2,26: OP ')'
+ 2,26-2,27: NEWLINE '\n'
+ 3,0-3,1: NL '\n'
+ 4,0-4,0: DEDENT ''
+ 4,0-4,9: NAME 'say_hello'
+ 4,9-4,10: OP '('
+ 4,10-4,11: OP ')'
+ 4,11-4,12: NEWLINE '\n'
+ 5,0-5,0: ENDMARKER ''
+
+The exact token type names can be displayed using the ``-e`` option:
+
+.. code-block:: sh
+
+ $ python -m tokenize -e hello.py
+ 0,0-0,0: ENCODING 'utf-8'
+ 1,0-1,3: NAME 'def'
+ 1,4-1,13: NAME 'say_hello'
+ 1,13-1,14: LPAR '('
+ 1,14-1,15: RPAR ')'
+ 1,15-1,16: COLON ':'
+ 1,16-1,17: NEWLINE '\n'
+ 2,0-2,4: INDENT ' '
+ 2,4-2,9: NAME 'print'
+ 2,9-2,10: LPAR '('
+ 2,10-2,25: STRING '"Hello, World!"'
+ 2,25-2,26: RPAR ')'
+ 2,26-2,27: NEWLINE '\n'
+ 3,0-3,1: NL '\n'
+ 4,0-4,0: DEDENT ''
+ 4,0-4,9: NAME 'say_hello'
+ 4,9-4,10: LPAR '('
+ 4,10-4,11: RPAR ')'
+ 4,11-4,12: NEWLINE '\n'
+ 5,0-5,0: ENDMARKER ''
diff --git a/Doc/library/types.rst b/Doc/library/types.rst
index d4a76b6..695480f 100644
--- a/Doc/library/types.rst
+++ b/Doc/library/types.rst
@@ -1,5 +1,5 @@
-:mod:`types` --- Names for built-in types
-=========================================
+:mod:`types` --- Dynamic type creation and names for built-in types
+===================================================================
.. module:: types
:synopsis: Names for built-in types.
@@ -8,20 +8,77 @@
--------------
-This module defines names for some object types that are used by the standard
+This module defines utility function to assist in dynamic creation of
+new types.
+
+It also defines names for some object types that are used by the standard
Python interpreter, but not exposed as builtins like :class:`int` or
-:class:`str` are. Also, it does not include some of the types that arise
-transparently during processing such as the ``listiterator`` type.
+:class:`str` are.
+
+
+Dynamic Type Creation
+---------------------
+
+.. function:: new_class(name, bases=(), kwds=None, exec_body=None)
+
+ Creates a class object dynamically using the appropriate metaclass.
+
+ The first three arguments are the components that make up a class
+ definition header: the class name, the base classes (in order), the
+ keyword arguments (such as ``metaclass``).
+
+ The *exec_body* argument is a callback that is used to populate the
+ freshly created class namespace. It should accept the class namespace
+ as its sole argument and update the namespace directly with the class
+ contents. If no callback is provided, it has the same effect as passing
+ in ``lambda ns: ns``.
+
+ .. versionadded:: 3.3
+
+.. function:: prepare_class(name, bases=(), kwds=None)
+
+ Calculates the appropriate metaclass and creates the class namespace.
+
+ The arguments are the components that make up a class definition header:
+ the class name, the base classes (in order) and the keyword arguments
+ (such as ``metaclass``).
+
+ The return value is a 3-tuple: ``metaclass, namespace, kwds``
+
+ *metaclass* is the appropriate metaclass, *namespace* is the
+ prepared class namespace and *kwds* is an updated copy of the passed
+ in *kwds* argument with any ``'metaclass'`` entry removed. If no *kwds*
+ argument is passed in, this will be an empty dict.
-Typical use is for :func:`isinstance` or :func:`issubclass` checks.
+ .. versionadded:: 3.3
-The module defines the following names:
+.. seealso::
+
+ :ref:`metaclasses`
+ Full details of the class creation process supported by these functions
+
+ :pep:`3115` - Metaclasses in Python 3000
+ Introduced the ``__prepare__`` namespace hook
+
+
+Standard Interpreter Types
+--------------------------
+
+This module provides names for many of the types that are required to
+implement a Python interpreter. It deliberately avoids including some of
+the types that arise only incidentally during processing such as the
+``listiterator`` type.
+
+Typical use of these names is for :func:`isinstance` or
+:func:`issubclass` checks.
+
+Standard names are defined for the following types:
.. data:: FunctionType
LambdaType
- The type of user-defined functions and functions created by :keyword:`lambda`
- expressions.
+ The type of user-defined functions and functions created by
+ :keyword:`lambda` expressions.
.. data:: GeneratorType
@@ -85,3 +142,79 @@ The module defines the following names:
In other implementations of Python, this type may be identical to
``GetSetDescriptorType``.
+
+.. class:: MappingProxyType(mapping)
+
+ Read-only proxy of a mapping. It provides a dynamic view on the mapping's
+ entries, which means that when the mapping changes, the view reflects these
+ changes.
+
+ .. versionadded:: 3.3
+
+ .. describe:: key in proxy
+
+ Return ``True`` if the underlying mapping has a key *key*, else
+ ``False``.
+
+ .. describe:: proxy[key]
+
+ Return the item of the underlying mapping with key *key*. Raises a
+ :exc:`KeyError` if *key* is not in the underlying mapping.
+
+ .. describe:: iter(proxy)
+
+ Return an iterator over the keys of the underlying mapping. This is a
+ shortcut for ``iter(proxy.keys())``.
+
+ .. describe:: len(proxy)
+
+ Return the number of items in the underlying mapping.
+
+ .. method:: copy()
+
+ Return a shallow copy of the underlying mapping.
+
+ .. method:: get(key[, default])
+
+ Return the value for *key* if *key* is in the underlying mapping, else
+ *default*. If *default* is not given, it defaults to ``None``, so that
+ this method never raises a :exc:`KeyError`.
+
+ .. method:: items()
+
+ Return a new view of the underlying mapping's items (``(key, value)``
+ pairs).
+
+ .. method:: keys()
+
+ Return a new view of the underlying mapping's keys.
+
+ .. method:: values()
+
+ Return a new view of the underlying mapping's values.
+
+
+.. class:: SimpleNamespace
+
+ A simple :class:`object` subclass that provides attribute access to its
+ namespace, as well as a meaningful repr.
+
+ Unlike :class:`object`, with ``SimpleNamespace`` you can add and remove
+ attributes. If a ``SimpleNamespace`` object is initialized with keyword
+ arguments, those are directly added to the underlying namespace.
+
+ The type is roughly equivalent to the following code::
+
+ class SimpleNamespace:
+ def __init__(self, **kwargs):
+ self.__dict__.update(kwargs)
+ def __repr__(self):
+ keys = sorted(self.__dict__)
+ items = ("{}={!r}".format(k, self.__dict__[k]) for k in keys)
+ return "{}({})".format(type(self).__name__, ", ".join(items))
+
+ ``SimpleNamespace`` may be useful as a replacement for ``class NS: pass``.
+ However, for a structured record type use :func:`~collections.namedtuple`
+ instead.
+
+ .. versionadded:: 3.3
diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst
index 3b1bbe0..3787c36 100644
--- a/Doc/library/unicodedata.rst
+++ b/Doc/library/unicodedata.rst
@@ -15,8 +15,8 @@
This module provides access to the Unicode Character Database (UCD) which
defines character properties for all Unicode characters. The data contained in
-this database is compiled from the `UCD version 6.0.0
-<http://www.unicode.org/Public/6.0.0/ucd>`_.
+this database is compiled from the `UCD version 6.1.0
+<http://www.unicode.org/Public/6.1.0/ucd>`_.
The module uses the same names and symbols as defined by Unicode
Standard Annex #44, `"Unicode Character Database"
@@ -29,6 +29,9 @@ following functions:
Look up character by name. If a character with the given name is found, return
the corresponding character. If not found, :exc:`KeyError` is raised.
+ .. versionchanged:: 3.3
+ Support for name aliases [#]_ and named sequences [#]_ has been added.
+
.. function:: name(chr[, default])
@@ -160,3 +163,9 @@ Examples:
>>> unicodedata.bidirectional('\u0660') # 'A'rabic, 'N'umber
'AN'
+
+.. rubric:: Footnotes
+
+.. [#] http://www.unicode.org/Public/6.1.0/ucd/NameAliases.txt
+
+.. [#] http://www.unicode.org/Public/6.1.0/ucd/NamedSequences.txt
diff --git a/Doc/library/unittest.mock-examples.rst b/Doc/library/unittest.mock-examples.rst
new file mode 100644
index 0000000..8e1e88a
--- /dev/null
+++ b/Doc/library/unittest.mock-examples.rst
@@ -0,0 +1,1246 @@
+:mod:`unittest.mock` --- getting started
+========================================
+
+.. moduleauthor:: Michael Foord <michael@python.org>
+.. currentmodule:: unittest.mock
+
+.. versionadded:: 3.3
+
+
+.. _getting-started:
+
+Using Mock
+----------
+
+Mock Patching Methods
+~~~~~~~~~~~~~~~~~~~~~
+
+Common uses for :class:`Mock` objects include:
+
+* Patching methods
+* Recording method calls on objects
+
+You might want to replace a method on an object to check that
+it is called with the correct arguments by another part of the system:
+
+ >>> real = SomeClass()
+ >>> real.method = MagicMock(name='method')
+ >>> real.method(3, 4, 5, key='value')
+ <MagicMock name='method()' id='...'>
+
+Once our mock has been used (`real.method` in this example) it has methods
+and attributes that allow you to make assertions about how it has been used.
+
+.. note::
+
+ In most of these examples the :class:`Mock` and :class:`MagicMock` classes
+ are interchangeable. As the `MagicMock` is the more capable class it makes
+ a sensible one to use by default.
+
+Once the mock has been called its :attr:`~Mock.called` attribute is set to
+`True`. More importantly we can use the :meth:`~Mock.assert_called_with` or
+:meth:`~Mock.assert_called_once_with` method to check that it was called with
+the correct arguments.
+
+This example tests that calling `ProductionClass().method` results in a call to
+the `something` method:
+
+ >>> class ProductionClass(object):
+ ... def method(self):
+ ... self.something(1, 2, 3)
+ ... def something(self, a, b, c):
+ ... pass
+ ...
+ >>> real = ProductionClass()
+ >>> real.something = MagicMock()
+ >>> real.method()
+ >>> real.something.assert_called_once_with(1, 2, 3)
+
+
+
+Mock for Method Calls on an Object
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the last example we patched a method directly on an object to check that it
+was called correctly. Another common use case is to pass an object into a
+method (or some part of the system under test) and then check that it is used
+in the correct way.
+
+The simple `ProductionClass` below has a `closer` method. If it is called with
+an object then it calls `close` on it.
+
+ >>> class ProductionClass(object):
+ ... def closer(self, something):
+ ... something.close()
+ ...
+
+So to test it we need to pass in an object with a `close` method and check
+that it was called correctly.
+
+ >>> real = ProductionClass()
+ >>> mock = Mock()
+ >>> real.closer(mock)
+ >>> mock.close.assert_called_with()
+
+We don't have to do any work to provide the 'close' method on our mock.
+Accessing close creates it. So, if 'close' hasn't already been called then
+accessing it in the test will create it, but :meth:`~Mock.assert_called_with`
+will raise a failure exception.
+
+
+Mocking Classes
+~~~~~~~~~~~~~~~
+
+A common use case is to mock out classes instantiated by your code under test.
+When you patch a class, then that class is replaced with a mock. Instances
+are created by *calling the class*. This means you access the "mock instance"
+by looking at the return value of the mocked class.
+
+In the example below we have a function `some_function` that instantiates `Foo`
+and calls a method on it. The call to `patch` replaces the class `Foo` with a
+mock. The `Foo` instance is the result of calling the mock, so it is configured
+by modifying the mock :attr:`~Mock.return_value`.
+
+ >>> def some_function():
+ ... instance = module.Foo()
+ ... return instance.method()
+ ...
+ >>> with patch('module.Foo') as mock:
+ ... instance = mock.return_value
+ ... instance.method.return_value = 'the result'
+ ... result = some_function()
+ ... assert result == 'the result'
+
+
+Naming your mocks
+~~~~~~~~~~~~~~~~~
+
+It can be useful to give your mocks a name. The name is shown in the repr of
+the mock and can be helpful when the mock appears in test failure messages. The
+name is also propagated to attributes or methods of the mock:
+
+ >>> mock = MagicMock(name='foo')
+ >>> mock
+ <MagicMock name='foo' id='...'>
+ >>> mock.method
+ <MagicMock name='foo.method' id='...'>
+
+
+Tracking all Calls
+~~~~~~~~~~~~~~~~~~
+
+Often you want to track more than a single call to a method. The
+:attr:`~Mock.mock_calls` attribute records all calls
+to child attributes of the mock - and also to their children.
+
+ >>> mock = MagicMock()
+ >>> mock.method()
+ <MagicMock name='mock.method()' id='...'>
+ >>> mock.attribute.method(10, x=53)
+ <MagicMock name='mock.attribute.method()' id='...'>
+ >>> mock.mock_calls
+ [call.method(), call.attribute.method(10, x=53)]
+
+If you make an assertion about `mock_calls` and any unexpected methods
+have been called, then the assertion will fail. This is useful because as well
+as asserting that the calls you expected have been made, you are also checking
+that they were made in the right order and with no additional calls:
+
+You use the :data:`call` object to construct lists for comparing with
+`mock_calls`:
+
+ >>> expected = [call.method(), call.attribute.method(10, x=53)]
+ >>> mock.mock_calls == expected
+ True
+
+
+Setting Return Values and Attributes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Setting the return values on a mock object is trivially easy:
+
+ >>> mock = Mock()
+ >>> mock.return_value = 3
+ >>> mock()
+ 3
+
+Of course you can do the same for methods on the mock:
+
+ >>> mock = Mock()
+ >>> mock.method.return_value = 3
+ >>> mock.method()
+ 3
+
+The return value can also be set in the constructor:
+
+ >>> mock = Mock(return_value=3)
+ >>> mock()
+ 3
+
+If you need an attribute setting on your mock, just do it:
+
+ >>> mock = Mock()
+ >>> mock.x = 3
+ >>> mock.x
+ 3
+
+Sometimes you want to mock up a more complex situation, like for example
+`mock.connection.cursor().execute("SELECT 1")`. If we wanted this call to
+return a list, then we have to configure the result of the nested call.
+
+We can use :data:`call` to construct the set of calls in a "chained call" like
+this for easy assertion afterwards:
+
+ >>> mock = Mock()
+ >>> cursor = mock.connection.cursor.return_value
+ >>> cursor.execute.return_value = ['foo']
+ >>> mock.connection.cursor().execute("SELECT 1")
+ ['foo']
+ >>> expected = call.connection.cursor().execute("SELECT 1").call_list()
+ >>> mock.mock_calls
+ [call.connection.cursor(), call.connection.cursor().execute('SELECT 1')]
+ >>> mock.mock_calls == expected
+ True
+
+It is the call to `.call_list()` that turns our call object into a list of
+calls representing the chained calls.
+
+
+Raising exceptions with mocks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A useful attribute is :attr:`~Mock.side_effect`. If you set this to an
+exception class or instance then the exception will be raised when the mock
+is called.
+
+ >>> mock = Mock(side_effect=Exception('Boom!'))
+ >>> mock()
+ Traceback (most recent call last):
+ ...
+ Exception: Boom!
+
+
+Side effect functions and iterables
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+`side_effect` can also be set to a function or an iterable. The use case for
+`side_effect` as an iterable is where your mock is going to be called several
+times, and you want each call to return a different value. When you set
+`side_effect` to an iterable every call to the mock returns the next value
+from the iterable:
+
+ >>> mock = MagicMock(side_effect=[4, 5, 6])
+ >>> mock()
+ 4
+ >>> mock()
+ 5
+ >>> mock()
+ 6
+
+
+For more advanced use cases, like dynamically varying the return values
+depending on what the mock is called with, `side_effect` can be a function.
+The function will be called with the same arguments as the mock. Whatever the
+function returns is what the call returns:
+
+ >>> vals = {(1, 2): 1, (2, 3): 2}
+ >>> def side_effect(*args):
+ ... return vals[args]
+ ...
+ >>> mock = MagicMock(side_effect=side_effect)
+ >>> mock(1, 2)
+ 1
+ >>> mock(2, 3)
+ 2
+
+
+Creating a Mock from an Existing Object
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+One problem with over use of mocking is that it couples your tests to the
+implementation of your mocks rather than your real code. Suppose you have a
+class that implements `some_method`. In a test for another class, you
+provide a mock of this object that *also* provides `some_method`. If later
+you refactor the first class, so that it no longer has `some_method` - then
+your tests will continue to pass even though your code is now broken!
+
+`Mock` allows you to provide an object as a specification for the mock,
+using the `spec` keyword argument. Accessing methods / attributes on the
+mock that don't exist on your specification object will immediately raise an
+attribute error. If you change the implementation of your specification, then
+tests that use that class will start failing immediately without you having to
+instantiate the class in those tests.
+
+ >>> mock = Mock(spec=SomeClass)
+ >>> mock.old_method()
+ Traceback (most recent call last):
+ ...
+ AttributeError: object has no attribute 'old_method'
+
+If you want a stronger form of specification that prevents the setting
+of arbitrary attributes as well as the getting of them then you can use
+`spec_set` instead of `spec`.
+
+
+
+Patch Decorators
+----------------
+
+.. note::
+
+ With `patch` it matters that you patch objects in the namespace where they
+ are looked up. This is normally straightforward, but for a quick guide
+ read :ref:`where to patch <where-to-patch>`.
+
+
+A common need in tests is to patch a class attribute or a module attribute,
+for example patching a builtin or patching a class in a module to test that it
+is instantiated. Modules and classes are effectively global, so patching on
+them has to be undone after the test or the patch will persist into other
+tests and cause hard to diagnose problems.
+
+mock provides three convenient decorators for this: `patch`, `patch.object` and
+`patch.dict`. `patch` takes a single string, of the form
+`package.module.Class.attribute` to specify the attribute you are patching. It
+also optionally takes a value that you want the attribute (or class or
+whatever) to be replaced with. 'patch.object' takes an object and the name of
+the attribute you would like patched, plus optionally the value to patch it
+with.
+
+`patch.object`:
+
+ >>> original = SomeClass.attribute
+ >>> @patch.object(SomeClass, 'attribute', sentinel.attribute)
+ ... def test():
+ ... assert SomeClass.attribute == sentinel.attribute
+ ...
+ >>> test()
+ >>> assert SomeClass.attribute == original
+
+ >>> @patch('package.module.attribute', sentinel.attribute)
+ ... def test():
+ ... from package.module import attribute
+ ... assert attribute is sentinel.attribute
+ ...
+ >>> test()
+
+If you are patching a module (including `__builtin__`) then use `patch`
+instead of `patch.object`:
+
+ >>> mock = MagicMock(return_value = sentinel.file_handle)
+ >>> with patch('__builtin__.open', mock):
+ ... handle = open('filename', 'r')
+ ...
+ >>> mock.assert_called_with('filename', 'r')
+ >>> assert handle == sentinel.file_handle, "incorrect file handle returned"
+
+The module name can be 'dotted', in the form `package.module` if needed:
+
+ >>> @patch('package.module.ClassName.attribute', sentinel.attribute)
+ ... def test():
+ ... from package.module import ClassName
+ ... assert ClassName.attribute == sentinel.attribute
+ ...
+ >>> test()
+
+A nice pattern is to actually decorate test methods themselves:
+
+ >>> class MyTest(unittest2.TestCase):
+ ... @patch.object(SomeClass, 'attribute', sentinel.attribute)
+ ... def test_something(self):
+ ... self.assertEqual(SomeClass.attribute, sentinel.attribute)
+ ...
+ >>> original = SomeClass.attribute
+ >>> MyTest('test_something').test_something()
+ >>> assert SomeClass.attribute == original
+
+If you want to patch with a Mock, you can use `patch` with only one argument
+(or `patch.object` with two arguments). The mock will be created for you and
+passed into the test function / method:
+
+ >>> class MyTest(unittest2.TestCase):
+ ... @patch.object(SomeClass, 'static_method')
+ ... def test_something(self, mock_method):
+ ... SomeClass.static_method()
+ ... mock_method.assert_called_with()
+ ...
+ >>> MyTest('test_something').test_something()
+
+You can stack up multiple patch decorators using this pattern:
+
+ >>> class MyTest(unittest2.TestCase):
+ ... @patch('package.module.ClassName1')
+ ... @patch('package.module.ClassName2')
+ ... def test_something(self, MockClass2, MockClass1):
+ ... self.assertTrue(package.module.ClassName1 is MockClass1)
+ ... self.assertTrue(package.module.ClassName2 is MockClass2)
+ ...
+ >>> MyTest('test_something').test_something()
+
+When you nest patch decorators the mocks are passed in to the decorated
+function in the same order they applied (the normal *python* order that
+decorators are applied). This means from the bottom up, so in the example
+above the mock for `test_module.ClassName2` is passed in first.
+
+There is also :func:`patch.dict` for setting values in a dictionary just
+during a scope and restoring the dictionary to its original state when the test
+ends:
+
+ >>> foo = {'key': 'value'}
+ >>> original = foo.copy()
+ >>> with patch.dict(foo, {'newkey': 'newvalue'}, clear=True):
+ ... assert foo == {'newkey': 'newvalue'}
+ ...
+ >>> assert foo == original
+
+`patch`, `patch.object` and `patch.dict` can all be used as context managers.
+
+Where you use `patch` to create a mock for you, you can get a reference to the
+mock using the "as" form of the with statement:
+
+ >>> class ProductionClass(object):
+ ... def method(self):
+ ... pass
+ ...
+ >>> with patch.object(ProductionClass, 'method') as mock_method:
+ ... mock_method.return_value = None
+ ... real = ProductionClass()
+ ... real.method(1, 2, 3)
+ ...
+ >>> mock_method.assert_called_with(1, 2, 3)
+
+
+As an alternative `patch`, `patch.object` and `patch.dict` can be used as
+class decorators. When used in this way it is the same as applying the
+decorator indvidually to every method whose name starts with "test".
+
+
+.. _further-examples:
+
+Further Examples
+================
+
+
+Here are some more examples for some slightly more advanced scenarios.
+
+
+Mocking chained calls
+---------------------
+
+Mocking chained calls is actually straightforward with mock once you
+understand the :attr:`~Mock.return_value` attribute. When a mock is called for
+the first time, or you fetch its `return_value` before it has been called, a
+new `Mock` is created.
+
+This means that you can see how the object returned from a call to a mocked
+object has been used by interrogating the `return_value` mock:
+
+ >>> mock = Mock()
+ >>> mock().foo(a=2, b=3)
+ <Mock name='mock().foo()' id='...'>
+ >>> mock.return_value.foo.assert_called_with(a=2, b=3)
+
+From here it is a simple step to configure and then make assertions about
+chained calls. Of course another alternative is writing your code in a more
+testable way in the first place...
+
+So, suppose we have some code that looks a little bit like this:
+
+ >>> class Something(object):
+ ... def __init__(self):
+ ... self.backend = BackendProvider()
+ ... def method(self):
+ ... response = self.backend.get_endpoint('foobar').create_call('spam', 'eggs').start_call()
+ ... # more code
+
+Assuming that `BackendProvider` is already well tested, how do we test
+`method()`? Specifically, we want to test that the code section `# more
+code` uses the response object in the correct way.
+
+As this chain of calls is made from an instance attribute we can monkey patch
+the `backend` attribute on a `Something` instance. In this particular case
+we are only interested in the return value from the final call to
+`start_call` so we don't have much configuration to do. Let's assume the
+object it returns is 'file-like', so we'll ensure that our response object
+uses the builtin `file` as its `spec`.
+
+To do this we create a mock instance as our mock backend and create a mock
+response object for it. To set the response as the return value for that final
+`start_call` we could do this:
+
+ `mock_backend.get_endpoint.return_value.create_call.return_value.start_call.return_value = mock_response`.
+
+We can do that in a slightly nicer way using the :meth:`~Mock.configure_mock`
+method to directly set the return value for us:
+
+ >>> something = Something()
+ >>> mock_response = Mock(spec=file)
+ >>> mock_backend = Mock()
+ >>> config = {'get_endpoint.return_value.create_call.return_value.start_call.return_value': mock_response}
+ >>> mock_backend.configure_mock(**config)
+
+With these we monkey patch the "mock backend" in place and can make the real
+call:
+
+ >>> something.backend = mock_backend
+ >>> something.method()
+
+Using :attr:`~Mock.mock_calls` we can check the chained call with a single
+assert. A chained call is several calls in one line of code, so there will be
+several entries in `mock_calls`. We can use :meth:`call.call_list` to create
+this list of calls for us:
+
+ >>> chained = call.get_endpoint('foobar').create_call('spam', 'eggs').start_call()
+ >>> call_list = chained.call_list()
+ >>> assert mock_backend.mock_calls == call_list
+
+
+Partial mocking
+---------------
+
+In some tests I wanted to mock out a call to `datetime.date.today()
+<http://docs.python.org/library/datetime.html#datetime.date.today>`_ to return
+a known date, but I didn't want to prevent the code under test from
+creating new date objects. Unfortunately `datetime.date` is written in C, and
+so I couldn't just monkey-patch out the static `date.today` method.
+
+I found a simple way of doing this that involved effectively wrapping the date
+class with a mock, but passing through calls to the constructor to the real
+class (and returning real instances).
+
+The :func:`patch decorator <patch>` is used here to
+mock out the `date` class in the module under test. The :attr:`side_effect`
+attribute on the mock date class is then set to a lambda function that returns
+a real date. When the mock date class is called a real date will be
+constructed and returned by `side_effect`.
+
+ >>> from datetime import date
+ >>> with patch('mymodule.date') as mock_date:
+ ... mock_date.today.return_value = date(2010, 10, 8)
+ ... mock_date.side_effect = lambda *args, **kw: date(*args, **kw)
+ ...
+ ... assert mymodule.date.today() == date(2010, 10, 8)
+ ... assert mymodule.date(2009, 6, 8) == date(2009, 6, 8)
+ ...
+
+Note that we don't patch `datetime.date` globally, we patch `date` in the
+module that *uses* it. See :ref:`where to patch <where-to-patch>`.
+
+When `date.today()` is called a known date is returned, but calls to the
+`date(...)` constructor still return normal dates. Without this you can find
+yourself having to calculate an expected result using exactly the same
+algorithm as the code under test, which is a classic testing anti-pattern.
+
+Calls to the date constructor are recorded in the `mock_date` attributes
+(`call_count` and friends) which may also be useful for your tests.
+
+An alternative way of dealing with mocking dates, or other builtin classes,
+is discussed in `this blog entry
+<http://williamjohnbert.com/2011/07/how-to-unit-testing-in-django-with-mocking-and-patching/>`_.
+
+
+Mocking a Generator Method
+--------------------------
+
+A Python generator is a function or method that uses the `yield statement
+<http://docs.python.org/reference/simple_stmts.html#the-yield-statement>`_ to
+return a series of values when iterated over [#]_.
+
+A generator method / function is called to return the generator object. It is
+the generator object that is then iterated over. The protocol method for
+iteration is `__iter__
+<http://docs.python.org/library/stdtypes.html#container.__iter__>`_, so we can
+mock this using a `MagicMock`.
+
+Here's an example class with an "iter" method implemented as a generator:
+
+ >>> class Foo(object):
+ ... def iter(self):
+ ... for i in [1, 2, 3]:
+ ... yield i
+ ...
+ >>> foo = Foo()
+ >>> list(foo.iter())
+ [1, 2, 3]
+
+
+How would we mock this class, and in particular its "iter" method?
+
+To configure the values returned from the iteration (implicit in the call to
+`list`), we need to configure the object returned by the call to `foo.iter()`.
+
+ >>> mock_foo = MagicMock()
+ >>> mock_foo.iter.return_value = iter([1, 2, 3])
+ >>> list(mock_foo.iter())
+ [1, 2, 3]
+
+.. [#] There are also generator expressions and more `advanced uses
+ <http://www.dabeaz.com/coroutines/index.html>`_ of generators, but we aren't
+ concerned about them here. A very good introduction to generators and how
+ powerful they are is: `Generator Tricks for Systems Programmers
+ <http://www.dabeaz.com/generators/>`_.
+
+
+Applying the same patch to every test method
+--------------------------------------------
+
+If you want several patches in place for multiple test methods the obvious way
+is to apply the patch decorators to every method. This can feel like unnecessary
+repetition. For Python 2.6 or more recent you can use `patch` (in all its
+various forms) as a class decorator. This applies the patches to all test
+methods on the class. A test method is identified by methods whose names start
+with `test`:
+
+ >>> @patch('mymodule.SomeClass')
+ ... class MyTest(TestCase):
+ ...
+ ... def test_one(self, MockSomeClass):
+ ... self.assertTrue(mymodule.SomeClass is MockSomeClass)
+ ...
+ ... def test_two(self, MockSomeClass):
+ ... self.assertTrue(mymodule.SomeClass is MockSomeClass)
+ ...
+ ... def not_a_test(self):
+ ... return 'something'
+ ...
+ >>> MyTest('test_one').test_one()
+ >>> MyTest('test_two').test_two()
+ >>> MyTest('test_two').not_a_test()
+ 'something'
+
+An alternative way of managing patches is to use the :ref:`start-and-stop`.
+These allow you to move the patching into your `setUp` and `tearDown` methods.
+
+ >>> class MyTest(TestCase):
+ ... def setUp(self):
+ ... self.patcher = patch('mymodule.foo')
+ ... self.mock_foo = self.patcher.start()
+ ...
+ ... def test_foo(self):
+ ... self.assertTrue(mymodule.foo is self.mock_foo)
+ ...
+ ... def tearDown(self):
+ ... self.patcher.stop()
+ ...
+ >>> MyTest('test_foo').run()
+
+If you use this technique you must ensure that the patching is "undone" by
+calling `stop`. This can be fiddlier than you might think, because if an
+exception is raised in the setUp then tearDown is not called.
+:meth:`unittest.TestCase.addCleanup` makes this easier:
+
+ >>> class MyTest(TestCase):
+ ... def setUp(self):
+ ... patcher = patch('mymodule.foo')
+ ... self.addCleanup(patcher.stop)
+ ... self.mock_foo = patcher.start()
+ ...
+ ... def test_foo(self):
+ ... self.assertTrue(mymodule.foo is self.mock_foo)
+ ...
+ >>> MyTest('test_foo').run()
+
+
+Mocking Unbound Methods
+-----------------------
+
+Whilst writing tests today I needed to patch an *unbound method* (patching the
+method on the class rather than on the instance). I needed self to be passed
+in as the first argument because I want to make asserts about which objects
+were calling this particular method. The issue is that you can't patch with a
+mock for this, because if you replace an unbound method with a mock it doesn't
+become a bound method when fetched from the instance, and so it doesn't get
+self passed in. The workaround is to patch the unbound method with a real
+function instead. The :func:`patch` decorator makes it so simple to
+patch out methods with a mock that having to create a real function becomes a
+nuisance.
+
+If you pass `autospec=True` to patch then it does the patching with a
+*real* function object. This function object has the same signature as the one
+it is replacing, but delegates to a mock under the hood. You still get your
+mock auto-created in exactly the same way as before. What it means though, is
+that if you use it to patch out an unbound method on a class the mocked
+function will be turned into a bound method if it is fetched from an instance.
+It will have `self` passed in as the first argument, which is exactly what I
+wanted:
+
+ >>> class Foo(object):
+ ... def foo(self):
+ ... pass
+ ...
+ >>> with patch.object(Foo, 'foo', autospec=True) as mock_foo:
+ ... mock_foo.return_value = 'foo'
+ ... foo = Foo()
+ ... foo.foo()
+ ...
+ 'foo'
+ >>> mock_foo.assert_called_once_with(foo)
+
+If we don't use `autospec=True` then the unbound method is patched out
+with a Mock instance instead, and isn't called with `self`.
+
+
+Checking multiple calls with mock
+---------------------------------
+
+mock has a nice API for making assertions about how your mock objects are used.
+
+ >>> mock = Mock()
+ >>> mock.foo_bar.return_value = None
+ >>> mock.foo_bar('baz', spam='eggs')
+ >>> mock.foo_bar.assert_called_with('baz', spam='eggs')
+
+If your mock is only being called once you can use the
+:meth:`assert_called_once_with` method that also asserts that the
+:attr:`call_count` is one.
+
+ >>> mock.foo_bar.assert_called_once_with('baz', spam='eggs')
+ >>> mock.foo_bar()
+ >>> mock.foo_bar.assert_called_once_with('baz', spam='eggs')
+ Traceback (most recent call last):
+ ...
+ AssertionError: Expected to be called once. Called 2 times.
+
+Both `assert_called_with` and `assert_called_once_with` make assertions about
+the *most recent* call. If your mock is going to be called several times, and
+you want to make assertions about *all* those calls you can use
+:attr:`~Mock.call_args_list`:
+
+ >>> mock = Mock(return_value=None)
+ >>> mock(1, 2, 3)
+ >>> mock(4, 5, 6)
+ >>> mock()
+ >>> mock.call_args_list
+ [call(1, 2, 3), call(4, 5, 6), call()]
+
+The :data:`call` helper makes it easy to make assertions about these calls. You
+can build up a list of expected calls and compare it to `call_args_list`. This
+looks remarkably similar to the repr of the `call_args_list`:
+
+ >>> expected = [call(1, 2, 3), call(4, 5, 6), call()]
+ >>> mock.call_args_list == expected
+ True
+
+
+Coping with mutable arguments
+-----------------------------
+
+Another situation is rare, but can bite you, is when your mock is called with
+mutable arguments. `call_args` and `call_args_list` store *references* to the
+arguments. If the arguments are mutated by the code under test then you can no
+longer make assertions about what the values were when the mock was called.
+
+Here's some example code that shows the problem. Imagine the following functions
+defined in 'mymodule'::
+
+ def frob(val):
+ pass
+
+ def grob(val):
+ "First frob and then clear val"
+ frob(val)
+ val.clear()
+
+When we try to test that `grob` calls `frob` with the correct argument look
+what happens:
+
+ >>> with patch('mymodule.frob') as mock_frob:
+ ... val = set([6])
+ ... mymodule.grob(val)
+ ...
+ >>> val
+ set([])
+ >>> mock_frob.assert_called_with(set([6]))
+ Traceback (most recent call last):
+ ...
+ AssertionError: Expected: ((set([6]),), {})
+ Called with: ((set([]),), {})
+
+One possibility would be for mock to copy the arguments you pass in. This
+could then cause problems if you do assertions that rely on object identity
+for equality.
+
+Here's one solution that uses the :attr:`side_effect`
+functionality. If you provide a `side_effect` function for a mock then
+`side_effect` will be called with the same args as the mock. This gives us an
+opportunity to copy the arguments and store them for later assertions. In this
+example I'm using *another* mock to store the arguments so that I can use the
+mock methods for doing the assertion. Again a helper function sets this up for
+me.
+
+ >>> from copy import deepcopy
+ >>> from unittest.mock import Mock, patch, DEFAULT
+ >>> def copy_call_args(mock):
+ ... new_mock = Mock()
+ ... def side_effect(*args, **kwargs):
+ ... args = deepcopy(args)
+ ... kwargs = deepcopy(kwargs)
+ ... new_mock(*args, **kwargs)
+ ... return DEFAULT
+ ... mock.side_effect = side_effect
+ ... return new_mock
+ ...
+ >>> with patch('mymodule.frob') as mock_frob:
+ ... new_mock = copy_call_args(mock_frob)
+ ... val = set([6])
+ ... mymodule.grob(val)
+ ...
+ >>> new_mock.assert_called_with(set([6]))
+ >>> new_mock.call_args
+ call(set([6]))
+
+`copy_call_args` is called with the mock that will be called. It returns a new
+mock that we do the assertion on. The `side_effect` function makes a copy of
+the args and calls our `new_mock` with the copy.
+
+.. note::
+
+ If your mock is only going to be used once there is an easier way of
+ checking arguments at the point they are called. You can simply do the
+ checking inside a `side_effect` function.
+
+ >>> def side_effect(arg):
+ ... assert arg == set([6])
+ ...
+ >>> mock = Mock(side_effect=side_effect)
+ >>> mock(set([6]))
+ >>> mock(set())
+ Traceback (most recent call last):
+ ...
+ AssertionError
+
+An alternative approach is to create a subclass of `Mock` or `MagicMock` that
+copies (using :func:`copy.deepcopy`) the arguments.
+Here's an example implementation:
+
+ >>> from copy import deepcopy
+ >>> class CopyingMock(MagicMock):
+ ... def __call__(self, *args, **kwargs):
+ ... args = deepcopy(args)
+ ... kwargs = deepcopy(kwargs)
+ ... return super(CopyingMock, self).__call__(*args, **kwargs)
+ ...
+ >>> c = CopyingMock(return_value=None)
+ >>> arg = set()
+ >>> c(arg)
+ >>> arg.add(1)
+ >>> c.assert_called_with(set())
+ >>> c.assert_called_with(arg)
+ Traceback (most recent call last):
+ ...
+ AssertionError: Expected call: mock(set([1]))
+ Actual call: mock(set([]))
+ >>> c.foo
+ <CopyingMock name='mock.foo' id='...'>
+
+When you subclass `Mock` or `MagicMock` all dynamically created attributes,
+and the `return_value` will use your subclass automatically. That means all
+children of a `CopyingMock` will also have the type `CopyingMock`.
+
+
+Nesting Patches
+---------------
+
+Using patch as a context manager is nice, but if you do multiple patches you
+can end up with nested with statements indenting further and further to the
+right:
+
+ >>> class MyTest(TestCase):
+ ...
+ ... def test_foo(self):
+ ... with patch('mymodule.Foo') as mock_foo:
+ ... with patch('mymodule.Bar') as mock_bar:
+ ... with patch('mymodule.Spam') as mock_spam:
+ ... assert mymodule.Foo is mock_foo
+ ... assert mymodule.Bar is mock_bar
+ ... assert mymodule.Spam is mock_spam
+ ...
+ >>> original = mymodule.Foo
+ >>> MyTest('test_foo').test_foo()
+ >>> assert mymodule.Foo is original
+
+With unittest `cleanup` functions and the :ref:`start-and-stop` we can
+achieve the same effect without the nested indentation. A simple helper
+method, `create_patch`, puts the patch in place and returns the created mock
+for us:
+
+ >>> class MyTest(TestCase):
+ ...
+ ... def create_patch(self, name):
+ ... patcher = patch(name)
+ ... thing = patcher.start()
+ ... self.addCleanup(patcher.stop)
+ ... return thing
+ ...
+ ... def test_foo(self):
+ ... mock_foo = self.create_patch('mymodule.Foo')
+ ... mock_bar = self.create_patch('mymodule.Bar')
+ ... mock_spam = self.create_patch('mymodule.Spam')
+ ...
+ ... assert mymodule.Foo is mock_foo
+ ... assert mymodule.Bar is mock_bar
+ ... assert mymodule.Spam is mock_spam
+ ...
+ >>> original = mymodule.Foo
+ >>> MyTest('test_foo').run()
+ >>> assert mymodule.Foo is original
+
+
+Mocking a dictionary with MagicMock
+-----------------------------------
+
+You may want to mock a dictionary, or other container object, recording all
+access to it whilst having it still behave like a dictionary.
+
+We can do this with :class:`MagicMock`, which will behave like a dictionary,
+and using :data:`~Mock.side_effect` to delegate dictionary access to a real
+underlying dictionary that is under our control.
+
+When the `__getitem__` and `__setitem__` methods of our `MagicMock` are called
+(normal dictionary access) then `side_effect` is called with the key (and in
+the case of `__setitem__` the value too). We can also control what is returned.
+
+After the `MagicMock` has been used we can use attributes like
+:data:`~Mock.call_args_list` to assert about how the dictionary was used:
+
+ >>> my_dict = {'a': 1, 'b': 2, 'c': 3}
+ >>> def getitem(name):
+ ... return my_dict[name]
+ ...
+ >>> def setitem(name, val):
+ ... my_dict[name] = val
+ ...
+ >>> mock = MagicMock()
+ >>> mock.__getitem__.side_effect = getitem
+ >>> mock.__setitem__.side_effect = setitem
+
+.. note::
+
+ An alternative to using `MagicMock` is to use `Mock` and *only* provide
+ the magic methods you specifically want:
+
+ >>> mock = Mock()
+ >>> mock.__setitem__ = Mock(side_effect=getitem)
+ >>> mock.__getitem__ = Mock(side_effect=setitem)
+
+ A *third* option is to use `MagicMock` but passing in `dict` as the `spec`
+ (or `spec_set`) argument so that the `MagicMock` created only has
+ dictionary magic methods available:
+
+ >>> mock = MagicMock(spec_set=dict)
+ >>> mock.__getitem__.side_effect = getitem
+ >>> mock.__setitem__.side_effect = setitem
+
+With these side effect functions in place, the `mock` will behave like a normal
+dictionary but recording the access. It even raises a `KeyError` if you try
+to access a key that doesn't exist.
+
+ >>> mock['a']
+ 1
+ >>> mock['c']
+ 3
+ >>> mock['d']
+ Traceback (most recent call last):
+ ...
+ KeyError: 'd'
+ >>> mock['b'] = 'fish'
+ >>> mock['d'] = 'eggs'
+ >>> mock['b']
+ 'fish'
+ >>> mock['d']
+ 'eggs'
+
+After it has been used you can make assertions about the access using the normal
+mock methods and attributes:
+
+ >>> mock.__getitem__.call_args_list
+ [call('a'), call('c'), call('d'), call('b'), call('d')]
+ >>> mock.__setitem__.call_args_list
+ [call('b', 'fish'), call('d', 'eggs')]
+ >>> my_dict
+ {'a': 1, 'c': 3, 'b': 'fish', 'd': 'eggs'}
+
+
+Mock subclasses and their attributes
+------------------------------------
+
+There are various reasons why you might want to subclass `Mock`. One reason
+might be to add helper methods. Here's a silly example:
+
+ >>> class MyMock(MagicMock):
+ ... def has_been_called(self):
+ ... return self.called
+ ...
+ >>> mymock = MyMock(return_value=None)
+ >>> mymock
+ <MyMock id='...'>
+ >>> mymock.has_been_called()
+ False
+ >>> mymock()
+ >>> mymock.has_been_called()
+ True
+
+The standard behaviour for `Mock` instances is that attributes and the return
+value mocks are of the same type as the mock they are accessed on. This ensures
+that `Mock` attributes are `Mocks` and `MagicMock` attributes are `MagicMocks`
+[#]_. So if you're subclassing to add helper methods then they'll also be
+available on the attributes and return value mock of instances of your
+subclass.
+
+ >>> mymock.foo
+ <MyMock name='mock.foo' id='...'>
+ >>> mymock.foo.has_been_called()
+ False
+ >>> mymock.foo()
+ <MyMock name='mock.foo()' id='...'>
+ >>> mymock.foo.has_been_called()
+ True
+
+Sometimes this is inconvenient. For example, `one user
+<https://code.google.com/p/mock/issues/detail?id=105>`_ is subclassing mock to
+created a `Twisted adaptor
+<http://twistedmatrix.com/documents/11.0.0/api/twisted.python.components.html>`_.
+Having this applied to attributes too actually causes errors.
+
+`Mock` (in all its flavours) uses a method called `_get_child_mock` to create
+these "sub-mocks" for attributes and return values. You can prevent your
+subclass being used for attributes by overriding this method. The signature is
+that it takes arbitrary keyword arguments (`**kwargs`) which are then passed
+onto the mock constructor:
+
+ >>> class Subclass(MagicMock):
+ ... def _get_child_mock(self, **kwargs):
+ ... return MagicMock(**kwargs)
+ ...
+ >>> mymock = Subclass()
+ >>> mymock.foo
+ <MagicMock name='mock.foo' id='...'>
+ >>> assert isinstance(mymock, Subclass)
+ >>> assert not isinstance(mymock.foo, Subclass)
+ >>> assert not isinstance(mymock(), Subclass)
+
+.. [#] An exception to this rule are the non-callable mocks. Attributes use the
+ callable variant because otherwise non-callable mocks couldn't have callable
+ methods.
+
+
+Mocking imports with patch.dict
+-------------------------------
+
+One situation where mocking can be hard is where you have a local import inside
+a function. These are harder to mock because they aren't using an object from
+the module namespace that we can patch out.
+
+Generally local imports are to be avoided. They are sometimes done to prevent
+circular dependencies, for which there is *usually* a much better way to solve
+the problem (refactor the code) or to prevent "up front costs" by delaying the
+import. This can also be solved in better ways than an unconditional local
+import (store the module as a class or module attribute and only do the import
+on first use).
+
+That aside there is a way to use `mock` to affect the results of an import.
+Importing fetches an *object* from the `sys.modules` dictionary. Note that it
+fetches an *object*, which need not be a module. Importing a module for the
+first time results in a module object being put in `sys.modules`, so usually
+when you import something you get a module back. This need not be the case
+however.
+
+This means you can use :func:`patch.dict` to *temporarily* put a mock in place
+in `sys.modules`. Any imports whilst this patch is active will fetch the mock.
+When the patch is complete (the decorated function exits, the with statement
+body is complete or `patcher.stop()` is called) then whatever was there
+previously will be restored safely.
+
+Here's an example that mocks out the 'fooble' module.
+
+ >>> mock = Mock()
+ >>> with patch.dict('sys.modules', {'fooble': mock}):
+ ... import fooble
+ ... fooble.blob()
+ ...
+ <Mock name='mock.blob()' id='...'>
+ >>> assert 'fooble' not in sys.modules
+ >>> mock.blob.assert_called_once_with()
+
+As you can see the `import fooble` succeeds, but on exit there is no 'fooble'
+left in `sys.modules`.
+
+This also works for the `from module import name` form:
+
+ >>> mock = Mock()
+ >>> with patch.dict('sys.modules', {'fooble': mock}):
+ ... from fooble import blob
+ ... blob.blip()
+ ...
+ <Mock name='mock.blob.blip()' id='...'>
+ >>> mock.blob.blip.assert_called_once_with()
+
+With slightly more work you can also mock package imports:
+
+ >>> mock = Mock()
+ >>> modules = {'package': mock, 'package.module': mock.module}
+ >>> with patch.dict('sys.modules', modules):
+ ... from package.module import fooble
+ ... fooble()
+ ...
+ <Mock name='mock.module.fooble()' id='...'>
+ >>> mock.module.fooble.assert_called_once_with()
+
+
+Tracking order of calls and less verbose call assertions
+--------------------------------------------------------
+
+The :class:`Mock` class allows you to track the *order* of method calls on
+your mock objects through the :attr:`~Mock.method_calls` attribute. This
+doesn't allow you to track the order of calls between separate mock objects,
+however we can use :attr:`~Mock.mock_calls` to achieve the same effect.
+
+Because mocks track calls to child mocks in `mock_calls`, and accessing an
+arbitrary attribute of a mock creates a child mock, we can create our separate
+mocks from a parent one. Calls to those child mock will then all be recorded,
+in order, in the `mock_calls` of the parent:
+
+ >>> manager = Mock()
+ >>> mock_foo = manager.foo
+ >>> mock_bar = manager.bar
+
+ >>> mock_foo.something()
+ <Mock name='mock.foo.something()' id='...'>
+ >>> mock_bar.other.thing()
+ <Mock name='mock.bar.other.thing()' id='...'>
+
+ >>> manager.mock_calls
+ [call.foo.something(), call.bar.other.thing()]
+
+We can then assert about the calls, including the order, by comparing with
+the `mock_calls` attribute on the manager mock:
+
+ >>> expected_calls = [call.foo.something(), call.bar.other.thing()]
+ >>> manager.mock_calls == expected_calls
+ True
+
+If `patch` is creating, and putting in place, your mocks then you can attach
+them to a manager mock using the :meth:`~Mock.attach_mock` method. After
+attaching calls will be recorded in `mock_calls` of the manager.
+
+ >>> manager = MagicMock()
+ >>> with patch('mymodule.Class1') as MockClass1:
+ ... with patch('mymodule.Class2') as MockClass2:
+ ... manager.attach_mock(MockClass1, 'MockClass1')
+ ... manager.attach_mock(MockClass2, 'MockClass2')
+ ... MockClass1().foo()
+ ... MockClass2().bar()
+ ...
+ <MagicMock name='mock.MockClass1().foo()' id='...'>
+ <MagicMock name='mock.MockClass2().bar()' id='...'>
+ >>> manager.mock_calls
+ [call.MockClass1(),
+ call.MockClass1().foo(),
+ call.MockClass2(),
+ call.MockClass2().bar()]
+
+If many calls have been made, but you're only interested in a particular
+sequence of them then an alternative is to use the
+:meth:`~Mock.assert_has_calls` method. This takes a list of calls (constructed
+with the :data:`call` object). If that sequence of calls are in
+:attr:`~Mock.mock_calls` then the assert succeeds.
+
+ >>> m = MagicMock()
+ >>> m().foo().bar().baz()
+ <MagicMock name='mock().foo().bar().baz()' id='...'>
+ >>> m.one().two().three()
+ <MagicMock name='mock.one().two().three()' id='...'>
+ >>> calls = call.one().two().three().call_list()
+ >>> m.assert_has_calls(calls)
+
+Even though the chained call `m.one().two().three()` aren't the only calls that
+have been made to the mock, the assert still succeeds.
+
+Sometimes a mock may have several calls made to it, and you are only interested
+in asserting about *some* of those calls. You may not even care about the
+order. In this case you can pass `any_order=True` to `assert_has_calls`:
+
+ >>> m = MagicMock()
+ >>> m(1), m.two(2, 3), m.seven(7), m.fifty('50')
+ (...)
+ >>> calls = [call.fifty('50'), call(1), call.seven(7)]
+ >>> m.assert_has_calls(calls, any_order=True)
+
+
+More complex argument matching
+------------------------------
+
+Using the same basic concept as :data:`ANY` we can implement matchers to do more
+complex assertions on objects used as arguments to mocks.
+
+Suppose we expect some object to be passed to a mock that by default
+compares equal based on object identity (which is the Python default for user
+defined classes). To use :meth:`~Mock.assert_called_with` we would need to pass
+in the exact same object. If we are only interested in some of the attributes
+of this object then we can create a matcher that will check these attributes
+for us.
+
+You can see in this example how a 'standard' call to `assert_called_with` isn't
+sufficient:
+
+ >>> class Foo(object):
+ ... def __init__(self, a, b):
+ ... self.a, self.b = a, b
+ ...
+ >>> mock = Mock(return_value=None)
+ >>> mock(Foo(1, 2))
+ >>> mock.assert_called_with(Foo(1, 2))
+ Traceback (most recent call last):
+ ...
+ AssertionError: Expected: call(<__main__.Foo object at 0x...>)
+ Actual call: call(<__main__.Foo object at 0x...>)
+
+A comparison function for our `Foo` class might look something like this:
+
+ >>> def compare(self, other):
+ ... if not type(self) == type(other):
+ ... return False
+ ... if self.a != other.a:
+ ... return False
+ ... if self.b != other.b:
+ ... return False
+ ... return True
+ ...
+
+And a matcher object that can use comparison functions like this for its
+equality operation would look something like this:
+
+ >>> class Matcher(object):
+ ... def __init__(self, compare, some_obj):
+ ... self.compare = compare
+ ... self.some_obj = some_obj
+ ... def __eq__(self, other):
+ ... return self.compare(self.some_obj, other)
+ ...
+
+Putting all this together:
+
+ >>> match_foo = Matcher(compare, Foo(1, 2))
+ >>> mock.assert_called_with(match_foo)
+
+The `Matcher` is instantiated with our compare function and the `Foo` object
+we want to compare against. In `assert_called_with` the `Matcher` equality
+method will be called, which compares the object the mock was called with
+against the one we created our matcher with. If they match then
+`assert_called_with` passes, and if they don't an `AssertionError` is raised:
+
+ >>> match_wrong = Matcher(compare, Foo(3, 4))
+ >>> mock.assert_called_with(match_wrong)
+ Traceback (most recent call last):
+ ...
+ AssertionError: Expected: ((<Matcher object at 0x...>,), {})
+ Called with: ((<Foo object at 0x...>,), {})
+
+With a bit of tweaking you could have the comparison function raise the
+`AssertionError` directly and provide a more useful failure message.
+
+As of version 1.5, the Python testing library `PyHamcrest
+<http://pypi.python.org/pypi/PyHamcrest>`_ provides similar functionality,
+that may be useful here, in the form of its equality matcher
+(`hamcrest.library.integration.match_equality
+<http://packages.python.org/PyHamcrest/integration.html#hamcrest.library.integration.match_equality>`_).
diff --git a/Doc/library/unittest.mock.rst b/Doc/library/unittest.mock.rst
new file mode 100644
index 0000000..3e50031
--- /dev/null
+++ b/Doc/library/unittest.mock.rst
@@ -0,0 +1,2225 @@
+:mod:`unittest.mock` --- mock object library
+============================================
+
+.. module:: unittest.mock
+ :synopsis: Mock object library.
+.. moduleauthor:: Michael Foord <michael@python.org>
+.. currentmodule:: unittest.mock
+
+.. versionadded:: 3.3
+
+:mod:`unittest.mock` is a library for testing in Python. It allows you to
+replace parts of your system under test with mock objects and make assertions
+about how they have been used.
+
+`unittest.mock` provides a core :class:`Mock` class removing the need to
+create a host of stubs throughout your test suite. After performing an
+action, you can make assertions about which methods / attributes were used
+and arguments they were called with. You can also specify return values and
+set needed attributes in the normal way.
+
+Additionally, mock provides a :func:`patch` decorator that handles patching
+module and class level attributes within the scope of a test, along with
+:const:`sentinel` for creating unique objects. See the `quick guide`_ for
+some examples of how to use :class:`Mock`, :class:`MagicMock` and
+:func:`patch`.
+
+Mock is very easy to use and is designed for use with :mod:`unittest`. Mock
+is based on the 'action -> assertion' pattern instead of `'record -> replay'`
+used by many mocking frameworks.
+
+There is a backport of `unittest.mock` for earlier versions of Python,
+available as `mock on PyPI <http://pypi.python.org/pypi/mock>`_.
+
+**Source code:** :source:`Lib/unittest/mock.py`
+
+
+Quick Guide
+-----------
+
+:class:`Mock` and :class:`MagicMock` objects create all attributes and
+methods as you access them and store details of how they have been used. You
+can configure them, to specify return values or limit what attributes are
+available, and then make assertions about how they have been used:
+
+ >>> from unittest.mock import MagicMock
+ >>> thing = ProductionClass()
+ >>> thing.method = MagicMock(return_value=3)
+ >>> thing.method(3, 4, 5, key='value')
+ 3
+ >>> thing.method.assert_called_with(3, 4, 5, key='value')
+
+:attr:`side_effect` allows you to perform side effects, including raising an
+exception when a mock is called:
+
+ >>> mock = Mock(side_effect=KeyError('foo'))
+ >>> mock()
+ Traceback (most recent call last):
+ ...
+ KeyError: 'foo'
+
+ >>> values = {'a': 1, 'b': 2, 'c': 3}
+ >>> def side_effect(arg):
+ ... return values[arg]
+ ...
+ >>> mock.side_effect = side_effect
+ >>> mock('a'), mock('b'), mock('c')
+ (1, 2, 3)
+ >>> mock.side_effect = [5, 4, 3, 2, 1]
+ >>> mock(), mock(), mock()
+ (5, 4, 3)
+
+Mock has many other ways you can configure it and control its behaviour. For
+example the `spec` argument configures the mock to take its specification
+from another object. Attempting to access attributes or methods on the mock
+that don't exist on the spec will fail with an `AttributeError`.
+
+The :func:`patch` decorator / context manager makes it easy to mock classes or
+objects in a module under test. The object you specify will be replaced with a
+mock (or other object) during the test and restored when the test ends:
+
+ >>> from unittest.mock import patch
+ >>> @patch('module.ClassName2')
+ ... @patch('module.ClassName1')
+ ... def test(MockClass1, MockClass2):
+ ... module.ClassName1()
+ ... module.ClassName2()
+ ... assert MockClass1 is module.ClassName1
+ ... assert MockClass2 is module.ClassName2
+ ... assert MockClass1.called
+ ... assert MockClass2.called
+ ...
+ >>> test()
+
+.. note::
+
+ When you nest patch decorators the mocks are passed in to the decorated
+ function in the same order they applied (the normal *python* order that
+ decorators are applied). This means from the bottom up, so in the example
+ above the mock for `module.ClassName1` is passed in first.
+
+ With `patch` it matters that you patch objects in the namespace where they
+ are looked up. This is normally straightforward, but for a quick guide
+ read :ref:`where to patch <where-to-patch>`.
+
+As well as a decorator `patch` can be used as a context manager in a with
+statement:
+
+ >>> with patch.object(ProductionClass, 'method', return_value=None) as mock_method:
+ ... thing = ProductionClass()
+ ... thing.method(1, 2, 3)
+ ...
+ >>> mock_method.assert_called_once_with(1, 2, 3)
+
+
+There is also :func:`patch.dict` for setting values in a dictionary just
+during a scope and restoring the dictionary to its original state when the test
+ends:
+
+ >>> foo = {'key': 'value'}
+ >>> original = foo.copy()
+ >>> with patch.dict(foo, {'newkey': 'newvalue'}, clear=True):
+ ... assert foo == {'newkey': 'newvalue'}
+ ...
+ >>> assert foo == original
+
+Mock supports the mocking of Python :ref:`magic methods <magic-methods>`. The
+easiest way of using magic methods is with the :class:`MagicMock` class. It
+allows you to do things like:
+
+ >>> mock = MagicMock()
+ >>> mock.__str__.return_value = 'foobarbaz'
+ >>> str(mock)
+ 'foobarbaz'
+ >>> mock.__str__.assert_called_with()
+
+Mock allows you to assign functions (or other Mock instances) to magic methods
+and they will be called appropriately. The `MagicMock` class is just a Mock
+variant that has all of the magic methods pre-created for you (well, all the
+useful ones anyway).
+
+The following is an example of using magic methods with the ordinary Mock
+class:
+
+ >>> mock = Mock()
+ >>> mock.__str__ = Mock(return_value='wheeeeee')
+ >>> str(mock)
+ 'wheeeeee'
+
+For ensuring that the mock objects in your tests have the same api as the
+objects they are replacing, you can use :ref:`auto-speccing <auto-speccing>`.
+Auto-speccing can be done through the `autospec` argument to patch, or the
+:func:`create_autospec` function. Auto-speccing creates mock objects that
+have the same attributes and methods as the objects they are replacing, and
+any functions and methods (including constructors) have the same call
+signature as the real object.
+
+This ensures that your mocks will fail in the same way as your production
+code if they are used incorrectly:
+
+ >>> from unittest.mock import create_autospec
+ >>> def function(a, b, c):
+ ... pass
+ ...
+ >>> mock_function = create_autospec(function, return_value='fishy')
+ >>> mock_function(1, 2, 3)
+ 'fishy'
+ >>> mock_function.assert_called_once_with(1, 2, 3)
+ >>> mock_function('wrong arguments')
+ Traceback (most recent call last):
+ ...
+ TypeError: <lambda>() takes exactly 3 arguments (1 given)
+
+`create_autospec` can also be used on classes, where it copies the signature of
+the `__init__` method, and on callable objects where it copies the signature of
+the `__call__` method.
+
+
+
+The Mock Class
+--------------
+
+
+`Mock` is a flexible mock object intended to replace the use of stubs and
+test doubles throughout your code. Mocks are callable and create attributes as
+new mocks when you access them [#]_. Accessing the same attribute will always
+return the same mock. Mocks record how you use them, allowing you to make
+assertions about what your code has done to them.
+
+:class:`MagicMock` is a subclass of `Mock` with all the magic methods
+pre-created and ready to use. There are also non-callable variants, useful
+when you are mocking out objects that aren't callable:
+:class:`NonCallableMock` and :class:`NonCallableMagicMock`
+
+The :func:`patch` decorators makes it easy to temporarily replace classes
+in a particular module with a `Mock` object. By default `patch` will create
+a `MagicMock` for you. You can specify an alternative class of `Mock` using
+the `new_callable` argument to `patch`.
+
+
+.. class:: Mock(spec=None, side_effect=None, return_value=DEFAULT, wraps=None, name=None, spec_set=None, **kwargs)
+
+ Create a new `Mock` object. `Mock` takes several optional arguments
+ that specify the behaviour of the Mock object:
+
+ * `spec`: This can be either a list of strings or an existing object (a
+ class or instance) that acts as the specification for the mock object. If
+ you pass in an object then a list of strings is formed by calling dir on
+ the object (excluding unsupported magic attributes and methods).
+ Accessing any attribute not in this list will raise an `AttributeError`.
+
+ If `spec` is an object (rather than a list of strings) then
+ :attr:`__class__` returns the class of the spec object. This allows mocks
+ to pass `isinstance` tests.
+
+ * `spec_set`: A stricter variant of `spec`. If used, attempting to *set*
+ or get an attribute on the mock that isn't on the object passed as
+ `spec_set` will raise an `AttributeError`.
+
+ * `side_effect`: A function to be called whenever the Mock is called. See
+ the :attr:`~Mock.side_effect` attribute. Useful for raising exceptions or
+ dynamically changing return values. The function is called with the same
+ arguments as the mock, and unless it returns :data:`DEFAULT`, the return
+ value of this function is used as the return value.
+
+ Alternatively `side_effect` can be an exception class or instance. In
+ this case the exception will be raised when the mock is called.
+
+ If `side_effect` is an iterable then each call to the mock will return
+ the next value from the iterable.
+
+ A `side_effect` can be cleared by setting it to `None`.
+
+ * `return_value`: The value returned when the mock is called. By default
+ this is a new Mock (created on first access). See the
+ :attr:`return_value` attribute.
+
+ * `wraps`: Item for the mock object to wrap. If `wraps` is not None then
+ calling the Mock will pass the call through to the wrapped object
+ (returning the real result). Attribute access on the mock will return a
+ Mock object that wraps the corresponding attribute of the wrapped
+ object (so attempting to access an attribute that doesn't exist will
+ raise an `AttributeError`).
+
+ If the mock has an explicit `return_value` set then calls are not passed
+ to the wrapped object and the `return_value` is returned instead.
+
+ * `name`: If the mock has a name then it will be used in the repr of the
+ mock. This can be useful for debugging. The name is propagated to child
+ mocks.
+
+ Mocks can also be called with arbitrary keyword arguments. These will be
+ used to set attributes on the mock after it is created. See the
+ :meth:`configure_mock` method for details.
+
+
+ .. method:: assert_called_with(*args, **kwargs)
+
+ This method is a convenient way of asserting that calls are made in a
+ particular way:
+
+ >>> mock = Mock()
+ >>> mock.method(1, 2, 3, test='wow')
+ <Mock name='mock.method()' id='...'>
+ >>> mock.method.assert_called_with(1, 2, 3, test='wow')
+
+
+ .. method:: assert_called_once_with(*args, **kwargs)
+
+ Assert that the mock was called exactly once and with the specified
+ arguments.
+
+ >>> mock = Mock(return_value=None)
+ >>> mock('foo', bar='baz')
+ >>> mock.assert_called_once_with('foo', bar='baz')
+ >>> mock('foo', bar='baz')
+ >>> mock.assert_called_once_with('foo', bar='baz')
+ Traceback (most recent call last):
+ ...
+ AssertionError: Expected 'mock' to be called once. Called 2 times.
+
+
+ .. method:: assert_any_call(*args, **kwargs)
+
+ assert the mock has been called with the specified arguments.
+
+ The assert passes if the mock has *ever* been called, unlike
+ :meth:`assert_called_with` and :meth:`assert_called_once_with` that
+ only pass if the call is the most recent one.
+
+ >>> mock = Mock(return_value=None)
+ >>> mock(1, 2, arg='thing')
+ >>> mock('some', 'thing', 'else')
+ >>> mock.assert_any_call(1, 2, arg='thing')
+
+
+ .. method:: assert_has_calls(calls, any_order=False)
+
+ assert the mock has been called with the specified calls.
+ The `mock_calls` list is checked for the calls.
+
+ If `any_order` is False (the default) then the calls must be
+ sequential. There can be extra calls before or after the
+ specified calls.
+
+ If `any_order` is True then the calls can be in any order, but
+ they must all appear in :attr:`mock_calls`.
+
+ >>> mock = Mock(return_value=None)
+ >>> mock(1)
+ >>> mock(2)
+ >>> mock(3)
+ >>> mock(4)
+ >>> calls = [call(2), call(3)]
+ >>> mock.assert_has_calls(calls)
+ >>> calls = [call(4), call(2), call(3)]
+ >>> mock.assert_has_calls(calls, any_order=True)
+
+
+ .. method:: reset_mock()
+
+ The reset_mock method resets all the call attributes on a mock object:
+
+ >>> mock = Mock(return_value=None)
+ >>> mock('hello')
+ >>> mock.called
+ True
+ >>> mock.reset_mock()
+ >>> mock.called
+ False
+
+ This can be useful where you want to make a series of assertions that
+ reuse the same object. Note that `reset_mock` *doesn't* clear the
+ return value, :attr:`side_effect` or any child attributes you have
+ set using normal assignment. Child mocks and the return value mock
+ (if any) are reset as well.
+
+
+ .. method:: mock_add_spec(spec, spec_set=False)
+
+ Add a spec to a mock. `spec` can either be an object or a
+ list of strings. Only attributes on the `spec` can be fetched as
+ attributes from the mock.
+
+ If `spec_set` is `True` then only attributes on the spec can be set.
+
+
+ .. method:: attach_mock(mock, attribute)
+
+ Attach a mock as an attribute of this one, replacing its name and
+ parent. Calls to the attached mock will be recorded in the
+ :attr:`method_calls` and :attr:`mock_calls` attributes of this one.
+
+
+ .. method:: configure_mock(**kwargs)
+
+ Set attributes on the mock through keyword arguments.
+
+ Attributes plus return values and side effects can be set on child
+ mocks using standard dot notation and unpacking a dictionary in the
+ method call:
+
+ >>> mock = Mock()
+ >>> attrs = {'method.return_value': 3, 'other.side_effect': KeyError}
+ >>> mock.configure_mock(**attrs)
+ >>> mock.method()
+ 3
+ >>> mock.other()
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+ The same thing can be achieved in the constructor call to mocks:
+
+ >>> attrs = {'method.return_value': 3, 'other.side_effect': KeyError}
+ >>> mock = Mock(some_attribute='eggs', **attrs)
+ >>> mock.some_attribute
+ 'eggs'
+ >>> mock.method()
+ 3
+ >>> mock.other()
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+ `configure_mock` exists to make it easier to do configuration
+ after the mock has been created.
+
+
+ .. method:: __dir__()
+
+ `Mock` objects limit the results of `dir(some_mock)` to useful results.
+ For mocks with a `spec` this includes all the permitted attributes
+ for the mock.
+
+ See :data:`FILTER_DIR` for what this filtering does, and how to
+ switch it off.
+
+
+ .. method:: _get_child_mock(**kw)
+
+ Create the child mocks for attributes and return value.
+ By default child mocks will be the same type as the parent.
+ Subclasses of Mock may want to override this to customize the way
+ child mocks are made.
+
+ For non-callable mocks the callable variant will be used (rather than
+ any custom subclass).
+
+
+ .. attribute:: called
+
+ A boolean representing whether or not the mock object has been called:
+
+ >>> mock = Mock(return_value=None)
+ >>> mock.called
+ False
+ >>> mock()
+ >>> mock.called
+ True
+
+ .. attribute:: call_count
+
+ An integer telling you how many times the mock object has been called:
+
+ >>> mock = Mock(return_value=None)
+ >>> mock.call_count
+ 0
+ >>> mock()
+ >>> mock()
+ >>> mock.call_count
+ 2
+
+
+ .. attribute:: return_value
+
+ Set this to configure the value returned by calling the mock:
+
+ >>> mock = Mock()
+ >>> mock.return_value = 'fish'
+ >>> mock()
+ 'fish'
+
+ The default return value is a mock object and you can configure it in
+ the normal way:
+
+ >>> mock = Mock()
+ >>> mock.return_value.attribute = sentinel.Attribute
+ >>> mock.return_value()
+ <Mock name='mock()()' id='...'>
+ >>> mock.return_value.assert_called_with()
+
+ `return_value` can also be set in the constructor:
+
+ >>> mock = Mock(return_value=3)
+ >>> mock.return_value
+ 3
+ >>> mock()
+ 3
+
+
+ .. attribute:: side_effect
+
+ This can either be a function to be called when the mock is called,
+ or an exception (class or instance) to be raised.
+
+ If you pass in a function it will be called with same arguments as the
+ mock and unless the function returns the :data:`DEFAULT` singleton the
+ call to the mock will then return whatever the function returns. If the
+ function returns :data:`DEFAULT` then the mock will return its normal
+ value (from the :attr:`return_value`.
+
+ An example of a mock that raises an exception (to test exception
+ handling of an API):
+
+ >>> mock = Mock()
+ >>> mock.side_effect = Exception('Boom!')
+ >>> mock()
+ Traceback (most recent call last):
+ ...
+ Exception: Boom!
+
+ Using `side_effect` to return a sequence of values:
+
+ >>> mock = Mock()
+ >>> mock.side_effect = [3, 2, 1]
+ >>> mock(), mock(), mock()
+ (3, 2, 1)
+
+ The `side_effect` function is called with the same arguments as the
+ mock (so it is wise for it to take arbitrary args and keyword
+ arguments) and whatever it returns is used as the return value for
+ the call. The exception is if `side_effect` returns :data:`DEFAULT`,
+ in which case the normal :attr:`return_value` is used.
+
+ >>> mock = Mock(return_value=3)
+ >>> def side_effect(*args, **kwargs):
+ ... return DEFAULT
+ ...
+ >>> mock.side_effect = side_effect
+ >>> mock()
+ 3
+
+ `side_effect` can be set in the constructor. Here's an example that
+ adds one to the value the mock is called with and returns it:
+
+ >>> side_effect = lambda value: value + 1
+ >>> mock = Mock(side_effect=side_effect)
+ >>> mock(3)
+ 4
+ >>> mock(-8)
+ -7
+
+ Setting `side_effect` to `None` clears it:
+
+ >>> m = Mock(side_effect=KeyError, return_value=3)
+ >>> m()
+ Traceback (most recent call last):
+ ...
+ KeyError
+ >>> m.side_effect = None
+ >>> m()
+ 3
+
+
+ .. attribute:: call_args
+
+ This is either `None` (if the mock hasn't been called), or the
+ arguments that the mock was last called with. This will be in the
+ form of a tuple: the first member is any ordered arguments the mock
+ was called with (or an empty tuple) and the second member is any
+ keyword arguments (or an empty dictionary).
+
+ >>> mock = Mock(return_value=None)
+ >>> print mock.call_args
+ None
+ >>> mock()
+ >>> mock.call_args
+ call()
+ >>> mock.call_args == ()
+ True
+ >>> mock(3, 4)
+ >>> mock.call_args
+ call(3, 4)
+ >>> mock.call_args == ((3, 4),)
+ True
+ >>> mock(3, 4, 5, key='fish', next='w00t!')
+ >>> mock.call_args
+ call(3, 4, 5, key='fish', next='w00t!')
+
+ `call_args`, along with members of the lists :attr:`call_args_list`,
+ :attr:`method_calls` and :attr:`mock_calls` are :data:`call` objects.
+ These are tuples, so they can be unpacked to get at the individual
+ arguments and make more complex assertions. See
+ :ref:`calls as tuples <calls-as-tuples>`.
+
+
+ .. attribute:: call_args_list
+
+ This is a list of all the calls made to the mock object in sequence
+ (so the length of the list is the number of times it has been
+ called). Before any calls have been made it is an empty list. The
+ :data:`call` object can be used for conveniently constructing lists of
+ calls to compare with `call_args_list`.
+
+ >>> mock = Mock(return_value=None)
+ >>> mock()
+ >>> mock(3, 4)
+ >>> mock(key='fish', next='w00t!')
+ >>> mock.call_args_list
+ [call(), call(3, 4), call(key='fish', next='w00t!')]
+ >>> expected = [(), ((3, 4),), ({'key': 'fish', 'next': 'w00t!'},)]
+ >>> mock.call_args_list == expected
+ True
+
+ Members of `call_args_list` are :data:`call` objects. These can be
+ unpacked as tuples to get at the individual arguments. See
+ :ref:`calls as tuples <calls-as-tuples>`.
+
+
+ .. attribute:: method_calls
+
+ As well as tracking calls to themselves, mocks also track calls to
+ methods and attributes, and *their* methods and attributes:
+
+ >>> mock = Mock()
+ >>> mock.method()
+ <Mock name='mock.method()' id='...'>
+ >>> mock.property.method.attribute()
+ <Mock name='mock.property.method.attribute()' id='...'>
+ >>> mock.method_calls
+ [call.method(), call.property.method.attribute()]
+
+ Members of `method_calls` are :data:`call` objects. These can be
+ unpacked as tuples to get at the individual arguments. See
+ :ref:`calls as tuples <calls-as-tuples>`.
+
+
+ .. attribute:: mock_calls
+
+ `mock_calls` records *all* calls to the mock object, its methods, magic
+ methods *and* return value mocks.
+
+ >>> mock = MagicMock()
+ >>> result = mock(1, 2, 3)
+ >>> mock.first(a=3)
+ <MagicMock name='mock.first()' id='...'>
+ >>> mock.second()
+ <MagicMock name='mock.second()' id='...'>
+ >>> int(mock)
+ 1
+ >>> result(1)
+ <MagicMock name='mock()()' id='...'>
+ >>> expected = [call(1, 2, 3), call.first(a=3), call.second(),
+ ... call.__int__(), call()(1)]
+ >>> mock.mock_calls == expected
+ True
+
+ Members of `mock_calls` are :data:`call` objects. These can be
+ unpacked as tuples to get at the individual arguments. See
+ :ref:`calls as tuples <calls-as-tuples>`.
+
+
+ .. attribute:: __class__
+
+ Normally the `__class__` attribute of an object will return its type.
+ For a mock object with a `spec` `__class__` returns the spec class
+ instead. This allows mock objects to pass `isinstance` tests for the
+ object they are replacing / masquerading as:
+
+ >>> mock = Mock(spec=3)
+ >>> isinstance(mock, int)
+ True
+
+ `__class__` is assignable to, this allows a mock to pass an
+ `isinstance` check without forcing you to use a spec:
+
+ >>> mock = Mock()
+ >>> mock.__class__ = dict
+ >>> isinstance(mock, dict)
+ True
+
+.. class:: NonCallableMock(spec=None, wraps=None, name=None, spec_set=None, **kwargs)
+
+ A non-callable version of `Mock`. The constructor parameters have the same
+ meaning of `Mock`, with the exception of `return_value` and `side_effect`
+ which have no meaning on a non-callable mock.
+
+Mock objects that use a class or an instance as a `spec` or `spec_set` are able
+to pass `isintance` tests:
+
+ >>> mock = Mock(spec=SomeClass)
+ >>> isinstance(mock, SomeClass)
+ True
+ >>> mock = Mock(spec_set=SomeClass())
+ >>> isinstance(mock, SomeClass)
+ True
+
+The `Mock` classes have support for mocking magic methods. See :ref:`magic
+methods <magic-methods>` for the full details.
+
+The mock classes and the :func:`patch` decorators all take arbitrary keyword
+arguments for configuration. For the `patch` decorators the keywords are
+passed to the constructor of the mock being created. The keyword arguments
+are for configuring attributes of the mock:
+
+ >>> m = MagicMock(attribute=3, other='fish')
+ >>> m.attribute
+ 3
+ >>> m.other
+ 'fish'
+
+The return value and side effect of child mocks can be set in the same way,
+using dotted notation. As you can't use dotted names directly in a call you
+have to create a dictionary and unpack it using `**`:
+
+ >>> attrs = {'method.return_value': 3, 'other.side_effect': KeyError}
+ >>> mock = Mock(some_attribute='eggs', **attrs)
+ >>> mock.some_attribute
+ 'eggs'
+ >>> mock.method()
+ 3
+ >>> mock.other()
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+
+.. class:: PropertyMock(*args, **kwargs)
+
+ A mock intended to be used as a property, or other descriptor, on a class.
+ `PropertyMock` provides `__get__` and `__set__` methods so you can specify
+ a return value when it is fetched.
+
+ Fetching a `PropertyMock` instance from an object calls the mock, with
+ no args. Setting it calls the mock with the value being set.
+
+ >>> class Foo(object):
+ ... @property
+ ... def foo(self):
+ ... return 'something'
+ ... @foo.setter
+ ... def foo(self, value):
+ ... pass
+ ...
+ >>> with patch('__main__.Foo.foo', new_callable=PropertyMock) as mock_foo:
+ ... mock_foo.return_value = 'mockity-mock'
+ ... this_foo = Foo()
+ ... print this_foo.foo
+ ... this_foo.foo = 6
+ ...
+ mockity-mock
+ >>> mock_foo.mock_calls
+ [call(), call(6)]
+
+Because of the way mock attributes are stored you can't directly attach a
+`PropertyMock` to a mock object. Instead you can attach it to the mock type
+object::
+
+ >>> m = MagicMock()
+ >>> p = PropertyMock(return_value=3)
+ >>> type(m).foo = p
+ >>> m.foo
+ 3
+ >>> p.assert_called_once_with()
+
+
+Calling
+~~~~~~~
+
+Mock objects are callable. The call will return the value set as the
+:attr:`~Mock.return_value` attribute. The default return value is a new Mock
+object; it is created the first time the return value is accessed (either
+explicitly or by calling the Mock) - but it is stored and the same one
+returned each time.
+
+Calls made to the object will be recorded in the attributes
+like :attr:`~Mock.call_args` and :attr:`~Mock.call_args_list`.
+
+If :attr:`~Mock.side_effect` is set then it will be called after the call has
+been recorded, so if `side_effect` raises an exception the call is still
+recorded.
+
+The simplest way to make a mock raise an exception when called is to make
+:attr:`~Mock.side_effect` an exception class or instance:
+
+ >>> m = MagicMock(side_effect=IndexError)
+ >>> m(1, 2, 3)
+ Traceback (most recent call last):
+ ...
+ IndexError
+ >>> m.mock_calls
+ [call(1, 2, 3)]
+ >>> m.side_effect = KeyError('Bang!')
+ >>> m('two', 'three', 'four')
+ Traceback (most recent call last):
+ ...
+ KeyError: 'Bang!'
+ >>> m.mock_calls
+ [call(1, 2, 3), call('two', 'three', 'four')]
+
+If `side_effect` is a function then whatever that function returns is what
+calls to the mock return. The `side_effect` function is called with the
+same arguments as the mock. This allows you to vary the return value of the
+call dynamically, based on the input:
+
+ >>> def side_effect(value):
+ ... return value + 1
+ ...
+ >>> m = MagicMock(side_effect=side_effect)
+ >>> m(1)
+ 2
+ >>> m(2)
+ 3
+ >>> m.mock_calls
+ [call(1), call(2)]
+
+If you want the mock to still return the default return value (a new mock), or
+any set return value, then there are two ways of doing this. Either return
+`mock.return_value` from inside `side_effect`, or return :data:`DEFAULT`:
+
+ >>> m = MagicMock()
+ >>> def side_effect(*args, **kwargs):
+ ... return m.return_value
+ ...
+ >>> m.side_effect = side_effect
+ >>> m.return_value = 3
+ >>> m()
+ 3
+ >>> def side_effect(*args, **kwargs):
+ ... return DEFAULT
+ ...
+ >>> m.side_effect = side_effect
+ >>> m()
+ 3
+
+To remove a `side_effect`, and return to the default behaviour, set the
+`side_effect` to `None`:
+
+ >>> m = MagicMock(return_value=6)
+ >>> def side_effect(*args, **kwargs):
+ ... return 3
+ ...
+ >>> m.side_effect = side_effect
+ >>> m()
+ 3
+ >>> m.side_effect = None
+ >>> m()
+ 6
+
+The `side_effect` can also be any iterable object. Repeated calls to the mock
+will return values from the iterable (until the iterable is exhausted and
+a `StopIteration` is raised):
+
+ >>> m = MagicMock(side_effect=[1, 2, 3])
+ >>> m()
+ 1
+ >>> m()
+ 2
+ >>> m()
+ 3
+ >>> m()
+ Traceback (most recent call last):
+ ...
+ StopIteration
+
+If any members of the iterable are exceptions they will be raised instead of
+returned::
+
+ >>> iterable = (33, ValueError, 66)
+ >>> m = MagicMock(side_effect=iterable)
+ >>> m()
+ 33
+ >>> m()
+ Traceback (most recent call last):
+ ...
+ ValueError
+ >>> m()
+ 66
+
+
+.. _deleting-attributes:
+
+Deleting Attributes
+~~~~~~~~~~~~~~~~~~~
+
+Mock objects create attributes on demand. This allows them to pretend to be
+objects of any type.
+
+You may want a mock object to return `False` to a `hasattr` call, or raise an
+`AttributeError` when an attribute is fetched. You can do this by providing
+an object as a `spec` for a mock, but that isn't always convenient.
+
+You "block" attributes by deleting them. Once deleted, accessing an attribute
+will raise an `AttributeError`.
+
+ >>> mock = MagicMock()
+ >>> hasattr(mock, 'm')
+ True
+ >>> del mock.m
+ >>> hasattr(mock, 'm')
+ False
+ >>> del mock.f
+ >>> mock.f
+ Traceback (most recent call last):
+ ...
+ AttributeError: f
+
+
+Attaching Mocks as Attributes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When you attach a mock as an attribute of another mock (or as the return
+value) it becomes a "child" of that mock. Calls to the child are recorded in
+the :attr:`~Mock.method_calls` and :attr:`~Mock.mock_calls` attributes of the
+parent. This is useful for configuring child mocks and then attaching them to
+the parent, or for attaching mocks to a parent that records all calls to the
+children and allows you to make assertions about the order of calls between
+mocks:
+
+ >>> parent = MagicMock()
+ >>> child1 = MagicMock(return_value=None)
+ >>> child2 = MagicMock(return_value=None)
+ >>> parent.child1 = child1
+ >>> parent.child2 = child2
+ >>> child1(1)
+ >>> child2(2)
+ >>> parent.mock_calls
+ [call.child1(1), call.child2(2)]
+
+The exception to this is if the mock has a name. This allows you to prevent
+the "parenting" if for some reason you don't want it to happen.
+
+ >>> mock = MagicMock()
+ >>> not_a_child = MagicMock(name='not-a-child')
+ >>> mock.attribute = not_a_child
+ >>> mock.attribute()
+ <MagicMock name='not-a-child()' id='...'>
+ >>> mock.mock_calls
+ []
+
+Mocks created for you by :func:`patch` are automatically given names. To
+attach mocks that have names to a parent you use the :meth:`~Mock.attach_mock`
+method:
+
+ >>> thing1 = object()
+ >>> thing2 = object()
+ >>> parent = MagicMock()
+ >>> with patch('__main__.thing1', return_value=None) as child1:
+ ... with patch('__main__.thing2', return_value=None) as child2:
+ ... parent.attach_mock(child1, 'child1')
+ ... parent.attach_mock(child2, 'child2')
+ ... child1('one')
+ ... child2('two')
+ ...
+ >>> parent.mock_calls
+ [call.child1('one'), call.child2('two')]
+
+
+.. [#] The only exceptions are magic methods and attributes (those that have
+ leading and trailing double underscores). Mock doesn't create these but
+ instead of raises an ``AttributeError``. This is because the interpreter
+ will often implicitly request these methods, and gets *very* confused to
+ get a new Mock object when it expects a magic method. If you need magic
+ method support see :ref:`magic methods <magic-methods>`.
+
+
+The patchers
+============
+
+The patch decorators are used for patching objects only within the scope of
+the function they decorate. They automatically handle the unpatching for you,
+even if exceptions are raised. All of these functions can also be used in with
+statements or as class decorators.
+
+
+patch
+-----
+
+.. note::
+
+ `patch` is straightforward to use. The key is to do the patching in the
+ right namespace. See the section `where to patch`_.
+
+.. function:: patch(target, new=DEFAULT, spec=None, create=False, spec_set=None, autospec=None, new_callable=None, **kwargs)
+
+ `patch` acts as a function decorator, class decorator or a context
+ manager. Inside the body of the function or with statement, the `target`
+ is patched with a `new` object. When the function/with statement exits
+ the patch is undone.
+
+ If `new` is omitted, then the target is replaced with a
+ :class:`MagicMock`. If `patch` is used as a decorator and `new` is
+ omitted, the created mock is passed in as an extra argument to the
+ decorated function. If `patch` is used as a context manager the created
+ mock is returned by the context manager.
+
+ `target` should be a string in the form `'package.module.ClassName'`. The
+ `target` is imported and the specified object replaced with the `new`
+ object, so the `target` must be importable from the environment you are
+ calling `patch` from. The target is imported when the decorated function
+ is executed, not at decoration time.
+
+ The `spec` and `spec_set` keyword arguments are passed to the `MagicMock`
+ if patch is creating one for you.
+
+ In addition you can pass `spec=True` or `spec_set=True`, which causes
+ patch to pass in the object being mocked as the spec/spec_set object.
+
+ `new_callable` allows you to specify a different class, or callable object,
+ that will be called to create the `new` object. By default `MagicMock` is
+ used.
+
+ A more powerful form of `spec` is `autospec`. If you set `autospec=True`
+ then the mock with be created with a spec from the object being replaced.
+ All attributes of the mock will also have the spec of the corresponding
+ attribute of the object being replaced. Methods and functions being mocked
+ will have their arguments checked and will raise a `TypeError` if they are
+ called with the wrong signature. For mocks
+ replacing a class, their return value (the 'instance') will have the same
+ spec as the class. See the :func:`create_autospec` function and
+ :ref:`auto-speccing`.
+
+ Instead of `autospec=True` you can pass `autospec=some_object` to use an
+ arbitrary object as the spec instead of the one being replaced.
+
+ By default `patch` will fail to replace attributes that don't exist. If
+ you pass in `create=True`, and the attribute doesn't exist, patch will
+ create the attribute for you when the patched function is called, and
+ delete it again afterwards. This is useful for writing tests against
+ attributes that your production code creates at runtime. It is off by by
+ default because it can be dangerous. With it switched on you can write
+ passing tests against APIs that don't actually exist!
+
+ Patch can be used as a `TestCase` class decorator. It works by
+ decorating each test method in the class. This reduces the boilerplate
+ code when your test methods share a common patchings set. `patch` finds
+ tests by looking for method names that start with `patch.TEST_PREFIX`.
+ By default this is `test`, which matches the way `unittest` finds tests.
+ You can specify an alternative prefix by setting `patch.TEST_PREFIX`.
+
+ Patch can be used as a context manager, with the with statement. Here the
+ patching applies to the indented block after the with statement. If you
+ use "as" then the patched object will be bound to the name after the
+ "as"; very useful if `patch` is creating a mock object for you.
+
+ `patch` takes arbitrary keyword arguments. These will be passed to
+ the `Mock` (or `new_callable`) on construction.
+
+ `patch.dict(...)`, `patch.multiple(...)` and `patch.object(...)` are
+ available for alternate use-cases.
+
+`patch` as function decorator, creating the mock for you and passing it into
+the decorated function:
+
+ >>> @patch('__main__.SomeClass')
+ ... def function(normal_argument, mock_class):
+ ... print(mock_class is SomeClass)
+ ...
+ >>> function(None)
+ True
+
+Patching a class replaces the class with a `MagicMock` *instance*. If the
+class is instantiated in the code under test then it will be the
+:attr:`~Mock.return_value` of the mock that will be used.
+
+If the class is instantiated multiple times you could use
+:attr:`~Mock.side_effect` to return a new mock each time. Alternatively you
+can set the `return_value` to be anything you want.
+
+To configure return values on methods of *instances* on the patched class
+you must do this on the `return_value`. For example:
+
+ >>> class Class(object):
+ ... def method(self):
+ ... pass
+ ...
+ >>> with patch('__main__.Class') as MockClass:
+ ... instance = MockClass.return_value
+ ... instance.method.return_value = 'foo'
+ ... assert Class() is instance
+ ... assert Class().method() == 'foo'
+ ...
+
+If you use `spec` or `spec_set` and `patch` is replacing a *class*, then the
+return value of the created mock will have the same spec.
+
+ >>> Original = Class
+ >>> patcher = patch('__main__.Class', spec=True)
+ >>> MockClass = patcher.start()
+ >>> instance = MockClass()
+ >>> assert isinstance(instance, Original)
+ >>> patcher.stop()
+
+The `new_callable` argument is useful where you want to use an alternative
+class to the default :class:`MagicMock` for the created mock. For example, if
+you wanted a :class:`NonCallableMock` to be used:
+
+ >>> thing = object()
+ >>> with patch('__main__.thing', new_callable=NonCallableMock) as mock_thing:
+ ... assert thing is mock_thing
+ ... thing()
+ ...
+ Traceback (most recent call last):
+ ...
+ TypeError: 'NonCallableMock' object is not callable
+
+Another use case might be to replace an object with a `StringIO` instance:
+
+ >>> from StringIO import StringIO
+ >>> def foo():
+ ... print 'Something'
+ ...
+ >>> @patch('sys.stdout', new_callable=StringIO)
+ ... def test(mock_stdout):
+ ... foo()
+ ... assert mock_stdout.getvalue() == 'Something\n'
+ ...
+ >>> test()
+
+When `patch` is creating a mock for you, it is common that the first thing
+you need to do is to configure the mock. Some of that configuration can be done
+in the call to patch. Any arbitrary keywords you pass into the call will be
+used to set attributes on the created mock:
+
+ >>> patcher = patch('__main__.thing', first='one', second='two')
+ >>> mock_thing = patcher.start()
+ >>> mock_thing.first
+ 'one'
+ >>> mock_thing.second
+ 'two'
+
+As well as attributes on the created mock attributes, like the
+:attr:`~Mock.return_value` and :attr:`~Mock.side_effect`, of child mocks can
+also be configured. These aren't syntactically valid to pass in directly as
+keyword arguments, but a dictionary with these as keys can still be expanded
+into a `patch` call using `**`:
+
+ >>> config = {'method.return_value': 3, 'other.side_effect': KeyError}
+ >>> patcher = patch('__main__.thing', **config)
+ >>> mock_thing = patcher.start()
+ >>> mock_thing.method()
+ 3
+ >>> mock_thing.other()
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+
+patch.object
+------------
+
+.. function:: patch.object(target, attribute, new=DEFAULT, spec=None, create=False, spec_set=None, autospec=None, new_callable=None, **kwargs)
+
+ patch the named member (`attribute`) on an object (`target`) with a mock
+ object.
+
+ `patch.object` can be used as a decorator, class decorator or a context
+ manager. Arguments `new`, `spec`, `create`, `spec_set`, `autospec` and
+ `new_callable` have the same meaning as for `patch`. Like `patch`,
+ `patch.object` takes arbitrary keyword arguments for configuring the mock
+ object it creates.
+
+ When used as a class decorator `patch.object` honours `patch.TEST_PREFIX`
+ for choosing which methods to wrap.
+
+You can either call `patch.object` with three arguments or two arguments. The
+three argument form takes the object to be patched, the attribute name and the
+object to replace the attribute with.
+
+When calling with the two argument form you omit the replacement object, and a
+mock is created for you and passed in as an extra argument to the decorated
+function:
+
+ >>> @patch.object(SomeClass, 'class_method')
+ ... def test(mock_method):
+ ... SomeClass.class_method(3)
+ ... mock_method.assert_called_with(3)
+ ...
+ >>> test()
+
+`spec`, `create` and the other arguments to `patch.object` have the same
+meaning as they do for `patch`.
+
+
+patch.dict
+----------
+
+.. function:: patch.dict(in_dict, values=(), clear=False, **kwargs)
+
+ Patch a dictionary, or dictionary like object, and restore the dictionary
+ to its original state after the test.
+
+ `in_dict` can be a dictionary or a mapping like container. If it is a
+ mapping then it must at least support getting, setting and deleting items
+ plus iterating over keys.
+
+ `in_dict` can also be a string specifying the name of the dictionary, which
+ will then be fetched by importing it.
+
+ `values` can be a dictionary of values to set in the dictionary. `values`
+ can also be an iterable of `(key, value)` pairs.
+
+ If `clear` is True then the dictionary will be cleared before the new
+ values are set.
+
+ `patch.dict` can also be called with arbitrary keyword arguments to set
+ values in the dictionary.
+
+ `patch.dict` can be used as a context manager, decorator or class
+ decorator. When used as a class decorator `patch.dict` honours
+ `patch.TEST_PREFIX` for choosing which methods to wrap.
+
+`patch.dict` can be used to add members to a dictionary, or simply let a test
+change a dictionary, and ensure the dictionary is restored when the test
+ends.
+
+ >>> foo = {}
+ >>> with patch.dict(foo, {'newkey': 'newvalue'}):
+ ... assert foo == {'newkey': 'newvalue'}
+ ...
+ >>> assert foo == {}
+
+ >>> import os
+ >>> with patch.dict('os.environ', {'newkey': 'newvalue'}):
+ ... print os.environ['newkey']
+ ...
+ newvalue
+ >>> assert 'newkey' not in os.environ
+
+Keywords can be used in the `patch.dict` call to set values in the dictionary:
+
+ >>> mymodule = MagicMock()
+ >>> mymodule.function.return_value = 'fish'
+ >>> with patch.dict('sys.modules', mymodule=mymodule):
+ ... import mymodule
+ ... mymodule.function('some', 'args')
+ ...
+ 'fish'
+
+`patch.dict` can be used with dictionary like objects that aren't actually
+dictionaries. At the very minimum they must support item getting, setting,
+deleting and either iteration or membership test. This corresponds to the
+magic methods `__getitem__`, `__setitem__`, `__delitem__` and either
+`__iter__` or `__contains__`.
+
+ >>> class Container(object):
+ ... def __init__(self):
+ ... self.values = {}
+ ... def __getitem__(self, name):
+ ... return self.values[name]
+ ... def __setitem__(self, name, value):
+ ... self.values[name] = value
+ ... def __delitem__(self, name):
+ ... del self.values[name]
+ ... def __iter__(self):
+ ... return iter(self.values)
+ ...
+ >>> thing = Container()
+ >>> thing['one'] = 1
+ >>> with patch.dict(thing, one=2, two=3):
+ ... assert thing['one'] == 2
+ ... assert thing['two'] == 3
+ ...
+ >>> assert thing['one'] == 1
+ >>> assert list(thing) == ['one']
+
+
+patch.multiple
+--------------
+
+.. function:: patch.multiple(target, spec=None, create=False, spec_set=None, autospec=None, new_callable=None, **kwargs)
+
+ Perform multiple patches in a single call. It takes the object to be
+ patched (either as an object or a string to fetch the object by importing)
+ and keyword arguments for the patches::
+
+ with patch.multiple(settings, FIRST_PATCH='one', SECOND_PATCH='two'):
+ ...
+
+ Use :data:`DEFAULT` as the value if you want `patch.multiple` to create
+ mocks for you. In this case the created mocks are passed into a decorated
+ function by keyword, and a dictionary is returned when `patch.multiple` is
+ used as a context manager.
+
+ `patch.multiple` can be used as a decorator, class decorator or a context
+ manager. The arguments `spec`, `spec_set`, `create`, `autospec` and
+ `new_callable` have the same meaning as for `patch`. These arguments will
+ be applied to *all* patches done by `patch.multiple`.
+
+ When used as a class decorator `patch.multiple` honours `patch.TEST_PREFIX`
+ for choosing which methods to wrap.
+
+If you want `patch.multiple` to create mocks for you, then you can use
+:data:`DEFAULT` as the value. If you use `patch.multiple` as a decorator
+then the created mocks are passed into the decorated function by keyword.
+
+ >>> thing = object()
+ >>> other = object()
+
+ >>> @patch.multiple('__main__', thing=DEFAULT, other=DEFAULT)
+ ... def test_function(thing, other):
+ ... assert isinstance(thing, MagicMock)
+ ... assert isinstance(other, MagicMock)
+ ...
+ >>> test_function()
+
+`patch.multiple` can be nested with other `patch` decorators, but put arguments
+passed by keyword *after* any of the standard arguments created by `patch`:
+
+ >>> @patch('sys.exit')
+ ... @patch.multiple('__main__', thing=DEFAULT, other=DEFAULT)
+ ... def test_function(mock_exit, other, thing):
+ ... assert 'other' in repr(other)
+ ... assert 'thing' in repr(thing)
+ ... assert 'exit' in repr(mock_exit)
+ ...
+ >>> test_function()
+
+If `patch.multiple` is used as a context manager, the value returned by the
+context manger is a dictionary where created mocks are keyed by name:
+
+ >>> with patch.multiple('__main__', thing=DEFAULT, other=DEFAULT) as values:
+ ... assert 'other' in repr(values['other'])
+ ... assert 'thing' in repr(values['thing'])
+ ... assert values['thing'] is thing
+ ... assert values['other'] is other
+ ...
+
+
+.. _start-and-stop:
+
+patch methods: start and stop
+-----------------------------
+
+All the patchers have `start` and `stop` methods. These make it simpler to do
+patching in `setUp` methods or where you want to do multiple patches without
+nesting decorators or with statements.
+
+To use them call `patch`, `patch.object` or `patch.dict` as normal and keep a
+reference to the returned `patcher` object. You can then call `start` to put
+the patch in place and `stop` to undo it.
+
+If you are using `patch` to create a mock for you then it will be returned by
+the call to `patcher.start`.
+
+ >>> patcher = patch('package.module.ClassName')
+ >>> from package import module
+ >>> original = module.ClassName
+ >>> new_mock = patcher.start()
+ >>> assert module.ClassName is not original
+ >>> assert module.ClassName is new_mock
+ >>> patcher.stop()
+ >>> assert module.ClassName is original
+ >>> assert module.ClassName is not new_mock
+
+
+A typical use case for this might be for doing multiple patches in the `setUp`
+method of a `TestCase`:
+
+ >>> class MyTest(TestCase):
+ ... def setUp(self):
+ ... self.patcher1 = patch('package.module.Class1')
+ ... self.patcher2 = patch('package.module.Class2')
+ ... self.MockClass1 = self.patcher1.start()
+ ... self.MockClass2 = self.patcher2.start()
+ ...
+ ... def tearDown(self):
+ ... self.patcher1.stop()
+ ... self.patcher2.stop()
+ ...
+ ... def test_something(self):
+ ... assert package.module.Class1 is self.MockClass1
+ ... assert package.module.Class2 is self.MockClass2
+ ...
+ >>> MyTest('test_something').run()
+
+.. caution::
+
+ If you use this technique you must ensure that the patching is "undone" by
+ calling `stop`. This can be fiddlier than you might think, because if an
+ exception is raised in the ``setUp`` then ``tearDown`` is not called.
+ :meth:`unittest.TestCase.addCleanup` makes this easier:
+
+ >>> class MyTest(TestCase):
+ ... def setUp(self):
+ ... patcher = patch('package.module.Class')
+ ... self.MockClass = patcher.start()
+ ... self.addCleanup(patcher.stop)
+ ...
+ ... def test_something(self):
+ ... assert package.module.Class is self.MockClass
+ ...
+
+ As an added bonus you no longer need to keep a reference to the `patcher`
+ object.
+
+It is also possible to stop all patches which have been started by using
+`patch.stopall`.
+
+.. function:: patch.stopall
+
+ Stop all active patches. Only stops patches started with `start`.
+
+
+TEST_PREFIX
+-----------
+
+All of the patchers can be used as class decorators. When used in this way
+they wrap every test method on the class. The patchers recognise methods that
+start with `test` as being test methods. This is the same way that the
+:class:`unittest.TestLoader` finds test methods by default.
+
+It is possible that you want to use a different prefix for your tests. You can
+inform the patchers of the different prefix by setting `patch.TEST_PREFIX`:
+
+ >>> patch.TEST_PREFIX = 'foo'
+ >>> value = 3
+ >>>
+ >>> @patch('__main__.value', 'not three')
+ ... class Thing(object):
+ ... def foo_one(self):
+ ... print value
+ ... def foo_two(self):
+ ... print value
+ ...
+ >>>
+ >>> Thing().foo_one()
+ not three
+ >>> Thing().foo_two()
+ not three
+ >>> value
+ 3
+
+
+Nesting Patch Decorators
+------------------------
+
+If you want to perform multiple patches then you can simply stack up the
+decorators.
+
+You can stack up multiple patch decorators using this pattern:
+
+ >>> @patch.object(SomeClass, 'class_method')
+ ... @patch.object(SomeClass, 'static_method')
+ ... def test(mock1, mock2):
+ ... assert SomeClass.static_method is mock1
+ ... assert SomeClass.class_method is mock2
+ ... SomeClass.static_method('foo')
+ ... SomeClass.class_method('bar')
+ ... return mock1, mock2
+ ...
+ >>> mock1, mock2 = test()
+ >>> mock1.assert_called_once_with('foo')
+ >>> mock2.assert_called_once_with('bar')
+
+
+Note that the decorators are applied from the bottom upwards. This is the
+standard way that Python applies decorators. The order of the created mocks
+passed into your test function matches this order.
+
+
+.. _where-to-patch:
+
+Where to patch
+--------------
+
+`patch` works by (temporarily) changing the object that a *name* points to with
+another one. There can be many names pointing to any individual object, so
+for patching to work you must ensure that you patch the name used by the system
+under test.
+
+The basic principle is that you patch where an object is *looked up*, which
+is not necessarily the same place as where it is defined. A couple of
+examples will help to clarify this.
+
+Imagine we have a project that we want to test with the following structure::
+
+ a.py
+ -> Defines SomeClass
+
+ b.py
+ -> from a import SomeClass
+ -> some_function instantiates SomeClass
+
+Now we want to test `some_function` but we want to mock out `SomeClass` using
+`patch`. The problem is that when we import module b, which we will have to
+do then it imports `SomeClass` from module a. If we use `patch` to mock out
+`a.SomeClass` then it will have no effect on our test; module b already has a
+reference to the *real* `SomeClass` and it looks like our patching had no
+effect.
+
+The key is to patch out `SomeClass` where it is used (or where it is looked up
+). In this case `some_function` will actually look up `SomeClass` in module b,
+where we have imported it. The patching should look like::
+
+ @patch('b.SomeClass')
+
+However, consider the alternative scenario where instead of `from a import
+SomeClass` module b does `import a` and `some_function` uses `a.SomeClass`. Both
+of these import forms are common. In this case the class we want to patch is
+being looked up on the a module and so we have to patch `a.SomeClass` instead::
+
+ @patch('a.SomeClass')
+
+
+Patching Descriptors and Proxy Objects
+--------------------------------------
+
+Both patch_ and patch.object_ correctly patch and restore descriptors: class
+methods, static methods and properties. You should patch these on the *class*
+rather than an instance. They also work with *some* objects
+that proxy attribute access, like the `django setttings object
+<http://www.voidspace.org.uk/python/weblog/arch_d7_2010_12_04.shtml#e1198>`_.
+
+
+MagicMock and magic method support
+==================================
+
+.. _magic-methods:
+
+Mocking Magic Methods
+---------------------
+
+:class:`Mock` supports mocking the Python protocol methods, also known as
+"magic methods". This allows mock objects to replace containers or other
+objects that implement Python protocols.
+
+Because magic methods are looked up differently from normal methods [#]_, this
+support has been specially implemented. This means that only specific magic
+methods are supported. The supported list includes *almost* all of them. If
+there are any missing that you need please let us know.
+
+You mock magic methods by setting the method you are interested in to a function
+or a mock instance. If you are using a function then it *must* take ``self`` as
+the first argument [#]_.
+
+ >>> def __str__(self):
+ ... return 'fooble'
+ ...
+ >>> mock = Mock()
+ >>> mock.__str__ = __str__
+ >>> str(mock)
+ 'fooble'
+
+ >>> mock = Mock()
+ >>> mock.__str__ = Mock()
+ >>> mock.__str__.return_value = 'fooble'
+ >>> str(mock)
+ 'fooble'
+
+ >>> mock = Mock()
+ >>> mock.__iter__ = Mock(return_value=iter([]))
+ >>> list(mock)
+ []
+
+One use case for this is for mocking objects used as context managers in a
+`with` statement:
+
+ >>> mock = Mock()
+ >>> mock.__enter__ = Mock(return_value='foo')
+ >>> mock.__exit__ = Mock(return_value=False)
+ >>> with mock as m:
+ ... assert m == 'foo'
+ ...
+ >>> mock.__enter__.assert_called_with()
+ >>> mock.__exit__.assert_called_with(None, None, None)
+
+Calls to magic methods do not appear in :attr:`~Mock.method_calls`, but they
+are recorded in :attr:`~Mock.mock_calls`.
+
+.. note::
+
+ If you use the `spec` keyword argument to create a mock then attempting to
+ set a magic method that isn't in the spec will raise an `AttributeError`.
+
+The full list of supported magic methods is:
+
+* ``__hash__``, ``__sizeof__``, ``__repr__`` and ``__str__``
+* ``__dir__``, ``__format__`` and ``__subclasses__``
+* ``__floor__``, ``__trunc__`` and ``__ceil__``
+* Comparisons: ``__cmp__``, ``__lt__``, ``__gt__``, ``__le__``, ``__ge__``,
+ ``__eq__`` and ``__ne__``
+* Container methods: ``__getitem__``, ``__setitem__``, ``__delitem__``,
+ ``__contains__``, ``__len__``, ``__iter__``, ``__getslice__``,
+ ``__setslice__``, ``__reversed__`` and ``__missing__``
+* Context manager: ``__enter__`` and ``__exit__``
+* Unary numeric methods: ``__neg__``, ``__pos__`` and ``__invert__``
+* The numeric methods (including right hand and in-place variants):
+ ``__add__``, ``__sub__``, ``__mul__``, ``__div__``,
+ ``__floordiv__``, ``__mod__``, ``__divmod__``, ``__lshift__``,
+ ``__rshift__``, ``__and__``, ``__xor__``, ``__or__``, and ``__pow__``
+* Numeric conversion methods: ``__complex__``, ``__int__``, ``__float__``,
+ ``__index__`` and ``__coerce__``
+* Descriptor methods: ``__get__``, ``__set__`` and ``__delete__``
+* Pickling: ``__reduce__``, ``__reduce_ex__``, ``__getinitargs__``,
+ ``__getnewargs__``, ``__getstate__`` and ``__setstate__``
+
+
+The following methods exist but are *not* supported as they are either in use
+by mock, can't be set dynamically, or can cause problems:
+
+* ``__getattr__``, ``__setattr__``, ``__init__`` and ``__new__``
+* ``__prepare__``, ``__instancecheck__``, ``__subclasscheck__``, ``__del__``
+
+
+
+Magic Mock
+----------
+
+There are two `MagicMock` variants: `MagicMock` and `NonCallableMagicMock`.
+
+
+.. class:: MagicMock(*args, **kw)
+
+ ``MagicMock`` is a subclass of :class:`Mock` with default implementations
+ of most of the magic methods. You can use ``MagicMock`` without having to
+ configure the magic methods yourself.
+
+ The constructor parameters have the same meaning as for :class:`Mock`.
+
+ If you use the `spec` or `spec_set` arguments then *only* magic methods
+ that exist in the spec will be created.
+
+
+.. class:: NonCallableMagicMock(*args, **kw)
+
+ A non-callable version of `MagicMock`.
+
+ The constructor parameters have the same meaning as for
+ :class:`MagicMock`, with the exception of `return_value` and
+ `side_effect` which have no meaning on a non-callable mock.
+
+The magic methods are setup with `MagicMock` objects, so you can configure them
+and use them in the usual way:
+
+ >>> mock = MagicMock()
+ >>> mock[3] = 'fish'
+ >>> mock.__setitem__.assert_called_with(3, 'fish')
+ >>> mock.__getitem__.return_value = 'result'
+ >>> mock[2]
+ 'result'
+
+By default many of the protocol methods are required to return objects of a
+specific type. These methods are preconfigured with a default return value, so
+that they can be used without you having to do anything if you aren't interested
+in the return value. You can still *set* the return value manually if you want
+to change the default.
+
+Methods and their defaults:
+
+* ``__lt__``: NotImplemented
+* ``__gt__``: NotImplemented
+* ``__le__``: NotImplemented
+* ``__ge__``: NotImplemented
+* ``__int__`` : 1
+* ``__contains__`` : False
+* ``__len__`` : 1
+* ``__iter__`` : iter([])
+* ``__exit__`` : False
+* ``__complex__`` : 1j
+* ``__float__`` : 1.0
+* ``__bool__`` : True
+* ``__index__`` : 1
+* ``__hash__`` : default hash for the mock
+* ``__str__`` : default str for the mock
+* ``__sizeof__``: default sizeof for the mock
+
+For example:
+
+ >>> mock = MagicMock()
+ >>> int(mock)
+ 1
+ >>> len(mock)
+ 0
+ >>> list(mock)
+ []
+ >>> object() in mock
+ False
+
+The two equality method, `__eq__` and `__ne__`, are special.
+They do the default equality comparison on identity, using a side
+effect, unless you change their return value to return something else:
+
+ >>> MagicMock() == 3
+ False
+ >>> MagicMock() != 3
+ True
+ >>> mock = MagicMock()
+ >>> mock.__eq__.return_value = True
+ >>> mock == 3
+ True
+
+The return value of `MagicMock.__iter__` can be any iterable object and isn't
+required to be an iterator:
+
+ >>> mock = MagicMock()
+ >>> mock.__iter__.return_value = ['a', 'b', 'c']
+ >>> list(mock)
+ ['a', 'b', 'c']
+ >>> list(mock)
+ ['a', 'b', 'c']
+
+If the return value *is* an iterator, then iterating over it once will consume
+it and subsequent iterations will result in an empty list:
+
+ >>> mock.__iter__.return_value = iter(['a', 'b', 'c'])
+ >>> list(mock)
+ ['a', 'b', 'c']
+ >>> list(mock)
+ []
+
+``MagicMock`` has all of the supported magic methods configured except for some
+of the obscure and obsolete ones. You can still set these up if you want.
+
+Magic methods that are supported but not setup by default in ``MagicMock`` are:
+
+* ``__subclasses__``
+* ``__dir__``
+* ``__format__``
+* ``__get__``, ``__set__`` and ``__delete__``
+* ``__reversed__`` and ``__missing__``
+* ``__reduce__``, ``__reduce_ex__``, ``__getinitargs__``, ``__getnewargs__``,
+ ``__getstate__`` and ``__setstate__``
+* ``__getformat__`` and ``__setformat__``
+
+
+
+.. [#] Magic methods *should* be looked up on the class rather than the
+ instance. Different versions of Python are inconsistent about applying this
+ rule. The supported protocol methods should work with all supported versions
+ of Python.
+.. [#] The function is basically hooked up to the class, but each ``Mock``
+ instance is kept isolated from the others.
+
+
+Helpers
+=======
+
+sentinel
+--------
+
+.. data:: sentinel
+
+ The ``sentinel`` object provides a convenient way of providing unique
+ objects for your tests.
+
+ Attributes are created on demand when you access them by name. Accessing
+ the same attribute will always return the same object. The objects
+ returned have a sensible repr so that test failure messages are readable.
+
+Sometimes when testing you need to test that a specific object is passed as an
+argument to another method, or returned. It can be common to create named
+sentinel objects to test this. `sentinel` provides a convenient way of
+creating and testing the identity of objects like this.
+
+In this example we monkey patch `method` to return `sentinel.some_object`:
+
+ >>> real = ProductionClass()
+ >>> real.method = Mock(name="method")
+ >>> real.method.return_value = sentinel.some_object
+ >>> result = real.method()
+ >>> assert result is sentinel.some_object
+ >>> sentinel.some_object
+ sentinel.some_object
+
+
+DEFAULT
+-------
+
+
+.. data:: DEFAULT
+
+ The `DEFAULT` object is a pre-created sentinel (actually
+ `sentinel.DEFAULT`). It can be used by :attr:`~Mock.side_effect`
+ functions to indicate that the normal return value should be used.
+
+
+
+call
+----
+
+.. function:: call(*args, **kwargs)
+
+ `call` is a helper object for making simpler assertions, for comparing with
+ :attr:`~Mock.call_args`, :attr:`~Mock.call_args_list`,
+ :attr:`~Mock.mock_calls` and :attr:`~Mock.method_calls`. `call` can also be
+ used with :meth:`~Mock.assert_has_calls`.
+
+ >>> m = MagicMock(return_value=None)
+ >>> m(1, 2, a='foo', b='bar')
+ >>> m()
+ >>> m.call_args_list == [call(1, 2, a='foo', b='bar'), call()]
+ True
+
+.. method:: call.call_list()
+
+ For a call object that represents multiple calls, `call_list`
+ returns a list of all the intermediate calls as well as the
+ final call.
+
+`call_list` is particularly useful for making assertions on "chained calls". A
+chained call is multiple calls on a single line of code. This results in
+multiple entries in :attr:`~Mock.mock_calls` on a mock. Manually constructing
+the sequence of calls can be tedious.
+
+:meth:`~call.call_list` can construct the sequence of calls from the same
+chained call:
+
+ >>> m = MagicMock()
+ >>> m(1).method(arg='foo').other('bar')(2.0)
+ <MagicMock name='mock().method().other()()' id='...'>
+ >>> kall = call(1).method(arg='foo').other('bar')(2.0)
+ >>> kall.call_list()
+ [call(1),
+ call().method(arg='foo'),
+ call().method().other('bar'),
+ call().method().other()(2.0)]
+ >>> m.mock_calls == kall.call_list()
+ True
+
+.. _calls-as-tuples:
+
+A `call` object is either a tuple of (positional args, keyword args) or
+(name, positional args, keyword args) depending on how it was constructed. When
+you construct them yourself this isn't particularly interesting, but the `call`
+objects that are in the :attr:`Mock.call_args`, :attr:`Mock.call_args_list` and
+:attr:`Mock.mock_calls` attributes can be introspected to get at the individual
+arguments they contain.
+
+The `call` objects in :attr:`Mock.call_args` and :attr:`Mock.call_args_list`
+are two-tuples of (positional args, keyword args) whereas the `call` objects
+in :attr:`Mock.mock_calls`, along with ones you construct yourself, are
+three-tuples of (name, positional args, keyword args).
+
+You can use their "tupleness" to pull out the individual arguments for more
+complex introspection and assertions. The positional arguments are a tuple
+(an empty tuple if there are no positional arguments) and the keyword
+arguments are a dictionary:
+
+ >>> m = MagicMock(return_value=None)
+ >>> m(1, 2, 3, arg='one', arg2='two')
+ >>> kall = m.call_args
+ >>> args, kwargs = kall
+ >>> args
+ (1, 2, 3)
+ >>> kwargs
+ {'arg2': 'two', 'arg': 'one'}
+ >>> args is kall[0]
+ True
+ >>> kwargs is kall[1]
+ True
+
+ >>> m = MagicMock()
+ >>> m.foo(4, 5, 6, arg='two', arg2='three')
+ <MagicMock name='mock.foo()' id='...'>
+ >>> kall = m.mock_calls[0]
+ >>> name, args, kwargs = kall
+ >>> name
+ 'foo'
+ >>> args
+ (4, 5, 6)
+ >>> kwargs
+ {'arg2': 'three', 'arg': 'two'}
+ >>> name is m.mock_calls[0][0]
+ True
+
+
+create_autospec
+---------------
+
+.. function:: create_autospec(spec, spec_set=False, instance=False, **kwargs)
+
+ Create a mock object using another object as a spec. Attributes on the
+ mock will use the corresponding attribute on the `spec` object as their
+ spec.
+
+ Functions or methods being mocked will have their arguments checked to
+ ensure that they are called with the correct signature.
+
+ If `spec_set` is `True` then attempting to set attributes that don't exist
+ on the spec object will raise an `AttributeError`.
+
+ If a class is used as a spec then the return value of the mock (the
+ instance of the class) will have the same spec. You can use a class as the
+ spec for an instance object by passing `instance=True`. The returned mock
+ will only be callable if instances of the mock are callable.
+
+ `create_autospec` also takes arbitrary keyword arguments that are passed to
+ the constructor of the created mock.
+
+See :ref:`auto-speccing` for examples of how to use auto-speccing with
+`create_autospec` and the `autospec` argument to :func:`patch`.
+
+
+ANY
+---
+
+.. data:: ANY
+
+Sometimes you may need to make assertions about *some* of the arguments in a
+call to mock, but either not care about some of the arguments or want to pull
+them individually out of :attr:`~Mock.call_args` and make more complex
+assertions on them.
+
+To ignore certain arguments you can pass in objects that compare equal to
+*everything*. Calls to :meth:`~Mock.assert_called_with` and
+:meth:`~Mock.assert_called_once_with` will then succeed no matter what was
+passed in.
+
+ >>> mock = Mock(return_value=None)
+ >>> mock('foo', bar=object())
+ >>> mock.assert_called_once_with('foo', bar=ANY)
+
+`ANY` can also be used in comparisons with call lists like
+:attr:`~Mock.mock_calls`:
+
+ >>> m = MagicMock(return_value=None)
+ >>> m(1)
+ >>> m(1, 2)
+ >>> m(object())
+ >>> m.mock_calls == [call(1), call(1, 2), ANY]
+ True
+
+
+
+FILTER_DIR
+----------
+
+.. data:: FILTER_DIR
+
+`FILTER_DIR` is a module level variable that controls the way mock objects
+respond to `dir` (only for Python 2.6 or more recent). The default is `True`,
+which uses the filtering described below, to only show useful members. If you
+dislike this filtering, or need to switch it off for diagnostic purposes, then
+set `mock.FILTER_DIR = False`.
+
+With filtering on, `dir(some_mock)` shows only useful attributes and will
+include any dynamically created attributes that wouldn't normally be shown.
+If the mock was created with a `spec` (or `autospec` of course) then all the
+attributes from the original are shown, even if they haven't been accessed
+yet:
+
+ >>> dir(Mock())
+ ['assert_any_call',
+ 'assert_called_once_with',
+ 'assert_called_with',
+ 'assert_has_calls',
+ 'attach_mock',
+ ...
+ >>> from urllib import request
+ >>> dir(Mock(spec=request))
+ ['AbstractBasicAuthHandler',
+ 'AbstractDigestAuthHandler',
+ 'AbstractHTTPHandler',
+ 'BaseHandler',
+ ...
+
+Many of the not-very-useful (private to `Mock` rather than the thing being
+mocked) underscore and double underscore prefixed attributes have been
+filtered from the result of calling `dir` on a `Mock`. If you dislike this
+behaviour you can switch it off by setting the module level switch
+`FILTER_DIR`:
+
+ >>> from unittest import mock
+ >>> mock.FILTER_DIR = False
+ >>> dir(mock.Mock())
+ ['_NonCallableMock__get_return_value',
+ '_NonCallableMock__get_side_effect',
+ '_NonCallableMock__return_value_doc',
+ '_NonCallableMock__set_return_value',
+ '_NonCallableMock__set_side_effect',
+ '__call__',
+ '__class__',
+ ...
+
+Alternatively you can just use `vars(my_mock)` (instance members) and
+`dir(type(my_mock))` (type members) to bypass the filtering irrespective of
+`mock.FILTER_DIR`.
+
+
+mock_open
+---------
+
+.. function:: mock_open(mock=None, read_data=None)
+
+ A helper function to create a mock to replace the use of `open`. It works
+ for `open` called directly or used as a context manager.
+
+ The `mock` argument is the mock object to configure. If `None` (the
+ default) then a `MagicMock` will be created for you, with the API limited
+ to methods or attributes available on standard file handles.
+
+ `read_data` is a string for the `read` method of the file handle to return.
+ This is an empty string by default.
+
+Using `open` as a context manager is a great way to ensure your file handles
+are closed properly and is becoming common::
+
+ with open('/some/path', 'w') as f:
+ f.write('something')
+
+The issue is that even if you mock out the call to `open` it is the
+*returned object* that is used as a context manager (and has `__enter__` and
+`__exit__` called).
+
+Mocking context managers with a :class:`MagicMock` is common enough and fiddly
+enough that a helper function is useful.
+
+ >>> m = mock_open()
+ >>> with patch('__main__.open', m, create=True):
+ ... with open('foo', 'w') as h:
+ ... h.write('some stuff')
+ ...
+ >>> m.mock_calls
+ [call('foo', 'w'),
+ call().__enter__(),
+ call().write('some stuff'),
+ call().__exit__(None, None, None)]
+ >>> m.assert_called_once_with('foo', 'w')
+ >>> handle = m()
+ >>> handle.write.assert_called_once_with('some stuff')
+
+And for reading files:
+
+ >>> with patch('__main__.open', mock_open(read_data='bibble'), create=True) as m:
+ ... with open('foo') as h:
+ ... result = h.read()
+ ...
+ >>> m.assert_called_once_with('foo')
+ >>> assert result == 'bibble'
+
+
+.. _auto-speccing:
+
+Autospeccing
+------------
+
+Autospeccing is based on the existing `spec` feature of mock. It limits the
+api of mocks to the api of an original object (the spec), but it is recursive
+(implemented lazily) so that attributes of mocks only have the same api as
+the attributes of the spec. In addition mocked functions / methods have the
+same call signature as the original so they raise a `TypeError` if they are
+called incorrectly.
+
+Before I explain how auto-speccing works, here's why it is needed.
+
+`Mock` is a very powerful and flexible object, but it suffers from two flaws
+when used to mock out objects from a system under test. One of these flaws is
+specific to the `Mock` api and the other is a more general problem with using
+mock objects.
+
+First the problem specific to `Mock`. `Mock` has two assert methods that are
+extremely handy: :meth:`~Mock.assert_called_with` and
+:meth:`~Mock.assert_called_once_with`.
+
+ >>> mock = Mock(name='Thing', return_value=None)
+ >>> mock(1, 2, 3)
+ >>> mock.assert_called_once_with(1, 2, 3)
+ >>> mock(1, 2, 3)
+ >>> mock.assert_called_once_with(1, 2, 3)
+ Traceback (most recent call last):
+ ...
+ AssertionError: Expected 'mock' to be called once. Called 2 times.
+
+Because mocks auto-create attributes on demand, and allow you to call them
+with arbitrary arguments, if you misspell one of these assert methods then
+your assertion is gone:
+
+.. code-block:: pycon
+
+ >>> mock = Mock(name='Thing', return_value=None)
+ >>> mock(1, 2, 3)
+ >>> mock.assret_called_once_with(4, 5, 6)
+
+Your tests can pass silently and incorrectly because of the typo.
+
+The second issue is more general to mocking. If you refactor some of your
+code, rename members and so on, any tests for code that is still using the
+*old api* but uses mocks instead of the real objects will still pass. This
+means your tests can all pass even though your code is broken.
+
+Note that this is another reason why you need integration tests as well as
+unit tests. Testing everything in isolation is all fine and dandy, but if you
+don't test how your units are "wired together" there is still lots of room
+for bugs that tests might have caught.
+
+`mock` already provides a feature to help with this, called speccing. If you
+use a class or instance as the `spec` for a mock then you can only access
+attributes on the mock that exist on the real class:
+
+ >>> from urllib import request
+ >>> mock = Mock(spec=request.Request)
+ >>> mock.assret_called_with
+ Traceback (most recent call last):
+ ...
+ AttributeError: Mock object has no attribute 'assret_called_with'
+
+The spec only applies to the mock itself, so we still have the same issue
+with any methods on the mock:
+
+.. code-block:: pycon
+
+ >>> mock.has_data()
+ <mock.Mock object at 0x...>
+ >>> mock.has_data.assret_called_with()
+
+Auto-speccing solves this problem. You can either pass `autospec=True` to
+`patch` / `patch.object` or use the `create_autospec` function to create a
+mock with a spec. If you use the `autospec=True` argument to `patch` then the
+object that is being replaced will be used as the spec object. Because the
+speccing is done "lazily" (the spec is created as attributes on the mock are
+accessed) you can use it with very complex or deeply nested objects (like
+modules that import modules that import modules) without a big performance
+hit.
+
+Here's an example of it in use:
+
+ >>> from urllib import request
+ >>> patcher = patch('__main__.request', autospec=True)
+ >>> mock_request = patcher.start()
+ >>> request is mock_request
+ True
+ >>> mock_request.Request
+ <MagicMock name='request.Request' spec='Request' id='...'>
+
+You can see that `request.Request` has a spec. `request.Request` takes two
+arguments in the constructor (one of which is `self`). Here's what happens if
+we try to call it incorrectly:
+
+ >>> req = request.Request()
+ Traceback (most recent call last):
+ ...
+ TypeError: <lambda>() takes at least 2 arguments (1 given)
+
+The spec also applies to instantiated classes (i.e. the return value of
+specced mocks):
+
+ >>> req = request.Request('foo')
+ >>> req
+ <NonCallableMagicMock name='request.Request()' spec='Request' id='...'>
+
+`Request` objects are not callable, so the return value of instantiating our
+mocked out `request.Request` is a non-callable mock. With the spec in place
+any typos in our asserts will raise the correct error:
+
+ >>> req.add_header('spam', 'eggs')
+ <MagicMock name='request.Request().add_header()' id='...'>
+ >>> req.add_header.assret_called_with
+ Traceback (most recent call last):
+ ...
+ AttributeError: Mock object has no attribute 'assret_called_with'
+ >>> req.add_header.assert_called_with('spam', 'eggs')
+
+In many cases you will just be able to add `autospec=True` to your existing
+`patch` calls and then be protected against bugs due to typos and api
+changes.
+
+As well as using `autospec` through `patch` there is a
+:func:`create_autospec` for creating autospecced mocks directly:
+
+ >>> from urllib import request
+ >>> mock_request = create_autospec(request)
+ >>> mock_request.Request('foo', 'bar')
+ <NonCallableMagicMock name='mock.Request()' spec='Request' id='...'>
+
+This isn't without caveats and limitations however, which is why it is not
+the default behaviour. In order to know what attributes are available on the
+spec object, autospec has to introspect (access attributes) the spec. As you
+traverse attributes on the mock a corresponding traversal of the original
+object is happening under the hood. If any of your specced objects have
+properties or descriptors that can trigger code execution then you may not be
+able to use autospec. On the other hand it is much better to design your
+objects so that introspection is safe [#]_.
+
+A more serious problem is that it is common for instance attributes to be
+created in the `__init__` method and not to exist on the class at all.
+`autospec` can't know about any dynamically created attributes and restricts
+the api to visible attributes.
+
+ >>> class Something(object):
+ ... def __init__(self):
+ ... self.a = 33
+ ...
+ >>> with patch('__main__.Something', autospec=True):
+ ... thing = Something()
+ ... thing.a
+ ...
+ Traceback (most recent call last):
+ ...
+ AttributeError: Mock object has no attribute 'a'
+
+There are a few different ways of resolving this problem. The easiest, but
+not necessarily the least annoying, way is to simply set the required
+attributes on the mock after creation. Just because `autospec` doesn't allow
+you to fetch attributes that don't exist on the spec it doesn't prevent you
+setting them:
+
+ >>> with patch('__main__.Something', autospec=True):
+ ... thing = Something()
+ ... thing.a = 33
+ ...
+
+There is a more aggressive version of both `spec` and `autospec` that *does*
+prevent you setting non-existent attributes. This is useful if you want to
+ensure your code only *sets* valid attributes too, but obviously it prevents
+this particular scenario:
+
+ >>> with patch('__main__.Something', autospec=True, spec_set=True):
+ ... thing = Something()
+ ... thing.a = 33
+ ...
+ Traceback (most recent call last):
+ ...
+ AttributeError: Mock object has no attribute 'a'
+
+Probably the best way of solving the problem is to add class attributes as
+default values for instance members initialised in `__init__`. Note that if
+you are only setting default attributes in `__init__` then providing them via
+class attributes (shared between instances of course) is faster too. e.g.
+
+.. code-block:: python
+
+ class Something(object):
+ a = 33
+
+This brings up another issue. It is relatively common to provide a default
+value of `None` for members that will later be an object of a different type.
+`None` would be useless as a spec because it wouldn't let you access *any*
+attributes or methods on it. As `None` is *never* going to be useful as a
+spec, and probably indicates a member that will normally of some other type,
+`autospec` doesn't use a spec for members that are set to `None`. These will
+just be ordinary mocks (well - `MagicMocks`):
+
+ >>> class Something(object):
+ ... member = None
+ ...
+ >>> mock = create_autospec(Something)
+ >>> mock.member.foo.bar.baz()
+ <MagicMock name='mock.member.foo.bar.baz()' id='...'>
+
+If modifying your production classes to add defaults isn't to your liking
+then there are more options. One of these is simply to use an instance as the
+spec rather than the class. The other is to create a subclass of the
+production class and add the defaults to the subclass without affecting the
+production class. Both of these require you to use an alternative object as
+the spec. Thankfully `patch` supports this - you can simply pass the
+alternative object as the `autospec` argument:
+
+ >>> class Something(object):
+ ... def __init__(self):
+ ... self.a = 33
+ ...
+ >>> class SomethingForTest(Something):
+ ... a = 33
+ ...
+ >>> p = patch('__main__.Something', autospec=SomethingForTest)
+ >>> mock = p.start()
+ >>> mock.a
+ <NonCallableMagicMock name='Something.a' spec='int' id='...'>
+
+
+.. [#] This only applies to classes or already instantiated objects. Calling
+ a mocked class to create a mock instance *does not* create a real instance.
+ It is only attribute lookups - along with calls to `dir` - that are done.
+
diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst
index 72a3a7b..2657ebd 100644
--- a/Doc/library/unittest.rst
+++ b/Doc/library/unittest.rst
@@ -792,11 +792,14 @@ Test cases
Run the test, collecting the result into the test result object passed as
*result*. If *result* is omitted or ``None``, a temporary result
object is created (by calling the :meth:`defaultTestResult` method) and
- used. The result object is not returned to :meth:`run`'s caller.
+ used. The result object is returned to :meth:`run`'s caller.
The same effect may be had by simply calling the :class:`TestCase`
instance.
+ .. versionchanged:: 3.3
+ Previous versions of ``run`` did not return the result. Neither did
+ calling an instance.
.. method:: skipTest(reason)
@@ -857,10 +860,11 @@ Test cases
| <TestCase.assertNotIsInstance>` | | |
+-----------------------------------------+-----------------------------+---------------+
- All the assert methods (except :meth:`assertRaises`,
- :meth:`assertRaisesRegex`, :meth:`assertWarns`, :meth:`assertWarnsRegex`)
- accept a *msg* argument that, if specified, is used as the error message on
- failure (see also :data:`longMessage`).
+ All the assert methods accept a *msg* argument that, if specified, is used
+ as the error message on failure (see also :data:`longMessage`).
+ Note that the *msg* keyword argument can be passed to :meth:`assertRaises`,
+ :meth:`assertRaisesRegex`, :meth:`assertWarns`, :meth:`assertWarnsRegex`
+ only when they are used as a context manager.
.. method:: assertEqual(first, second, msg=None)
@@ -955,7 +959,7 @@ Test cases
+---------------------------------------------------------+--------------------------------------+------------+
.. method:: assertRaises(exception, callable, *args, **kwds)
- assertRaises(exception)
+ assertRaises(exception, msg=None)
Test that an exception is raised when *callable* is called with any
positional or keyword arguments that are also passed to
@@ -964,12 +968,16 @@ Test cases
To catch any of a group of exceptions, a tuple containing the exception
classes may be passed as *exception*.
- If only the *exception* argument is given, returns a context manager so
- that the code under test can be written inline rather than as a function::
+ If only the *exception* and possibly the *msg* arguments are given,
+ return a context manager so that the code under test can be written
+ inline rather than as a function::
with self.assertRaises(SomeException):
do_something()
+ When used as a context manager, :meth:`assertRaises` accepts the
+ additional keyword argument *msg*.
+
The context manager will store the caught exception object in its
:attr:`exception` attribute. This can be useful if the intention
is to perform additional checks on the exception raised::
@@ -986,9 +994,12 @@ Test cases
.. versionchanged:: 3.2
Added the :attr:`exception` attribute.
+ .. versionchanged:: 3.3
+ Added the *msg* keyword argument when used as a context manager.
+
.. method:: assertRaisesRegex(exception, regex, callable, *args, **kwds)
- assertRaisesRegex(exception, regex)
+ assertRaisesRegex(exception, regex, msg=None)
Like :meth:`assertRaises` but also tests that *regex* matches
on the string representation of the raised exception. *regex* may be
@@ -1005,12 +1016,16 @@ Test cases
.. versionadded:: 3.1
under the name ``assertRaisesRegexp``.
+
.. versionchanged:: 3.2
Renamed to :meth:`assertRaisesRegex`.
+ .. versionchanged:: 3.3
+ Added the *msg* keyword argument when used as a context manager.
+
.. method:: assertWarns(warning, callable, *args, **kwds)
- assertWarns(warning)
+ assertWarns(warning, msg=None)
Test that a warning is triggered when *callable* is called with any
positional or keyword arguments that are also passed to
@@ -1019,12 +1034,16 @@ Test cases
To catch any of a group of warnings, a tuple containing the warning
classes may be passed as *warnings*.
- If only the *warning* argument is given, returns a context manager so
- that the code under test can be written inline rather than as a function::
+ If only the *warning* and possibly the *msg* arguments are given,
+ returns a context manager so that the code under test can be written
+ inline rather than as a function::
with self.assertWarns(SomeWarning):
do_something()
+ When used as a context manager, :meth:`assertRaises` accepts the
+ additional keyword argument *msg*.
+
The context manager will store the caught warning object in its
:attr:`warning` attribute, and the source line which triggered the
warnings in the :attr:`filename` and :attr:`lineno` attributes.
@@ -1042,9 +1061,12 @@ Test cases
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ Added the *msg* keyword argument when used as a context manager.
+
.. method:: assertWarnsRegex(warning, regex, callable, *args, **kwds)
- assertWarnsRegex(warning, regex)
+ assertWarnsRegex(warning, regex, msg=None)
Like :meth:`assertWarns` but also tests that *regex* matches on the
message of the triggered warning. *regex* may be a regular expression
@@ -1062,6 +1084,8 @@ Test cases
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ Added the *msg* keyword argument when used as a context manager.
There are also other methods used to perform more specific checks, such as:
@@ -1151,21 +1175,6 @@ Test cases
:meth:`.assertNotRegex`.
- .. method:: assertDictContainsSubset(subset, dictionary, msg=None)
-
- Tests whether the key/value pairs in *dictionary* are a superset of
- those in *subset*. If not, an error message listing the missing keys
- and mismatched values is generated.
-
- Note, the arguments are in the opposite order of what the method name
- dictates. Instead, consider using the set-methods on :ref:`dictionary
- views <dict-views>`, for example: ``d.keys() <= e.keys()`` or
- ``d.items() <= d.items()``.
-
- .. versionadded:: 3.1
- .. deprecated:: 3.2
-
-
.. method:: assertCountEqual(first, second, msg=None)
Test that sequence *first* contains the same elements as *second*,
@@ -1180,21 +1189,6 @@ Test cases
.. versionadded:: 3.2
- .. method:: assertSameElements(first, second, msg=None)
-
- Test that sequence *first* contains the same elements as *second*,
- regardless of their order. When they don't, an error message listing
- the differences between the sequences will be generated.
-
- Duplicate elements are ignored when comparing *first* and *second*.
- It is the equivalent of ``assertEqual(set(first), set(second))``
- but it works with sequences of unhashable objects as well. Because
- duplicates are ignored, this method has been deprecated in favour of
- :meth:`assertCountEqual`.
-
- .. versionadded:: 3.1
- .. deprecated:: 3.2
-
.. _type-specific-methods:
diff --git a/Doc/library/urllib.error.rst b/Doc/library/urllib.error.rst
index 282329f..e20db27 100644
--- a/Doc/library/urllib.error.rst
+++ b/Doc/library/urllib.error.rst
@@ -8,21 +8,23 @@
The :mod:`urllib.error` module defines the exception classes for exceptions
-raised by :mod:`urllib.request`. The base exception class is :exc:`URLError`,
-which inherits from :exc:`IOError`.
+raised by :mod:`urllib.request`. The base exception class is :exc:`URLError`.
The following exceptions are raised by :mod:`urllib.error` as appropriate:
.. exception:: URLError
The handlers raise this exception (or derived exceptions) when they run into
- a problem. It is a subclass of :exc:`IOError`.
+ a problem. It is a subclass of :exc:`OSError`.
.. attribute:: reason
The reason for this error. It can be a message string or another
- exception instance (:exc:`socket.error` for remote URLs, :exc:`OSError`
- for local URLs).
+ exception instance.
+
+ .. versionchanged:: 3.3
+ :exc:`URLError` has been made a subclass of :exc:`OSError` instead
+ of :exc:`IOError`.
.. exception:: HTTPError
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index bc1da62..208cb97 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -81,8 +81,7 @@ or on combining URL components into a URL string.
this argument is the empty string.
If the *allow_fragments* argument is false, fragment identifiers are not
- allowed, even if the URL's addressing scheme normally does support them. The
- default value for this argument is :const:`True`.
+ allowed. The default value for this argument is :const:`True`.
The return value is actually an instance of a subclass of :class:`tuple`. This
class has the following additional read-only convenience attributes:
@@ -119,6 +118,11 @@ or on combining URL components into a URL string.
.. versionchanged:: 3.2
Added IPv6 URL parsing capabilities.
+ .. versionchanged:: 3.3
+ The fragment is now parsed for all URL schemes (unless *allow_fragment* is
+ false), in accordance with :rfc:`3986`. Previously, a whitelist of
+ schemes that support fragments existed.
+
.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace')
@@ -141,8 +145,9 @@ or on combining URL components into a URL string.
percent-encoded sequences into Unicode characters, as accepted by the
:meth:`bytes.decode` method.
- Use the :func:`urllib.parse.urlencode` function to convert such
- dictionaries into query strings.
+ Use the :func:`urllib.parse.urlencode` function (with the ``doseq``
+ parameter set to ``True``) to convert such dictionaries into query
+ strings.
.. versionchanged:: 3.2
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index b4764cc..898fe71 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -16,7 +16,7 @@ authentication, redirections, cookies and more.
The :mod:`urllib.request` module defines the following functions:
-.. function:: urlopen(url, data=None[, timeout], *, cafile=None, capath=None)
+.. function:: urlopen(url, data=None[, timeout], *, cafile=None, capath=None, cadefault=True)
Open the URL *url*, which can be either a string or a
:class:`Request` object.
@@ -53,9 +53,15 @@ The :mod:`urllib.request` module defines the following functions:
point to a directory of hashed certificate files. More information can
be found in :meth:`ssl.SSLContext.load_verify_locations`.
+ The *cadefault* parameter specifies whether to fall back to loading a
+ default certificate store defined by the underlying OpenSSL library if the
+ *cafile* and *capath* parameters are omitted. This will only work on
+ some non-Windows platforms.
+
.. warning::
- If neither *cafile* nor *capath* is specified, an HTTPS request
- will not do any verification of the server's certificate.
+ If neither *cafile* nor *capath* is specified, and *cadefault* is False,
+ an HTTPS request will not do any verification of the server's
+ certificate.
This function returns a file-like object that works as a :term:`context manager`,
with two additional methods from the :mod:`urllib.response` module
@@ -92,6 +98,9 @@ The :mod:`urllib.request` module defines the following functions:
.. versionadded:: 3.2
*data* can be an iterable object.
+ .. versionchanged:: 3.3
+ *cadefault* was added.
+
.. function:: install_opener(opener)
Install an :class:`OpenerDirector` instance as the default global opener.
@@ -145,7 +154,7 @@ The :mod:`urllib.request` module defines the following functions:
The following classes are provided:
-.. class:: Request(url, data=None, headers={}, origin_req_host=None, unverifiable=False)
+.. class:: Request(url, data=None, headers={}, origin_req_host=None, unverifiable=False, method=None)
This class is an abstraction of a URL request.
@@ -198,6 +207,13 @@ The following classes are provided:
document, and the user had no option to approve the automatic
fetching of the image, this should be true.
+ *method* should be a string that indicates the HTTP request method that
+ will be used (e.g. ``'HEAD'``). Its value is stored in the
+ :attr:`~Request.method` attribute and is used by :meth:`get_method()`.
+
+ .. versionchanged:: 3.3
+ :attr:`Request.method` argument is added to the Request class.
+
.. class:: OpenerDirector()
@@ -263,10 +279,11 @@ The following classes are provided:
.. class:: HTTPBasicAuthHandler(password_mgr=None)
- Handle authentication with the remote host. *password_mgr*, if given, should be
- something that is compatible with :class:`HTTPPasswordMgr`; refer to section
- :ref:`http-password-mgr` for information on the interface that must be
- supported.
+ Handle authentication with the remote host. *password_mgr*, if given, should
+ be something that is compatible with :class:`HTTPPasswordMgr`; refer to
+ section :ref:`http-password-mgr` for information on the interface that must
+ be supported. HTTPBasicAuthHandler will raise a :exc:`ValueError` when
+ presented with a wrong Authentication scheme.
.. class:: ProxyBasicAuthHandler(password_mgr=None)
@@ -288,10 +305,19 @@ The following classes are provided:
.. class:: HTTPDigestAuthHandler(password_mgr=None)
- Handle authentication with the remote host. *password_mgr*, if given, should be
- something that is compatible with :class:`HTTPPasswordMgr`; refer to section
- :ref:`http-password-mgr` for information on the interface that must be
- supported.
+ Handle authentication with the remote host. *password_mgr*, if given, should
+ be something that is compatible with :class:`HTTPPasswordMgr`; refer to
+ section :ref:`http-password-mgr` for information on the interface that must
+ be supported. When both Digest Authentication Handler and Basic
+ Authentication Handler are both added, Digest Authentication is always tried
+ first. If the Digest Authentication returns a 40x response again, it is sent
+ to Basic Authentication handler to Handle. This Handler method will raise a
+ :exc:`ValueError` when presented with an authentication scheme other than
+ Digest or Basic.
+
+ .. versionchanged:: 3.3
+ Raise :exc:`ValueError` on unsupported Authentication Scheme.
+
.. class:: ProxyDigestAuthHandler(password_mgr=None)
@@ -382,27 +408,25 @@ request.
boolean, indicates whether the request is unverifiable as defined
by RFC 2965.
-.. method:: Request.add_data(data)
-
- Set the :class:`Request` data to *data*. This is ignored by all handlers except
- HTTP handlers --- and there it should be a byte string, and will change the
- request to be ``POST`` rather than ``GET``.
-
-
-.. method:: Request.get_method()
-
- Return a string indicating the HTTP request method. This is only meaningful for
- HTTP requests, and currently always returns ``'GET'`` or ``'POST'``.
+.. attribute:: Request.method
+ The HTTP request method to use. This value is used by
+ :meth:`~Request.get_method` to override the computed HTTP request
+ method that would otherwise be returned. This attribute is initialized with
+ the value of the *method* argument passed to the constructor.
-.. method:: Request.has_data()
+ .. versionadded:: 3.3
- Return whether the instance has a non-\ ``None`` data.
+.. method:: Request.get_method()
-.. method:: Request.get_data()
+ Return a string indicating the HTTP request method. If
+ :attr:`Request.method` is not ``None``, return its value, otherwise return
+ ``'GET'`` if :attr:`Request.data` is ``None``, or ``'POST'`` if it's not.
+ This is only meaningful for HTTP requests.
- Return the instance's data.
+ .. versionchanged:: 3.3
+ get_method now looks at the value of :attr:`Request.method`.
.. method:: Request.add_header(key, val)
@@ -432,20 +456,60 @@ request.
Return the URL given in the constructor.
+.. method:: Request.set_proxy(host, type)
+
+ Prepare the request by connecting to a proxy server. The *host* and *type* will
+ replace those of the instance, and the instance's selector will be the original
+ URL given in the constructor.
+
+
+.. method:: Request.add_data(data)
+
+ Set the :class:`Request` data to *data*. This is ignored by all handlers except
+ HTTP handlers --- and there it should be a byte string, and will change the
+ request to be ``POST`` rather than ``GET``. Deprecated in 3.3, use
+ :attr:`Request.data`.
+
+ .. deprecated:: 3.3
+
+
+.. method:: Request.has_data()
+
+ Return whether the instance has a non-\ ``None`` data. Deprecated in 3.3,
+ use :attr:`Request.data`.
+
+ .. deprecated:: 3.3
+
+
+.. method:: Request.get_data()
+
+ Return the instance's data. Deprecated in 3.3, use :attr:`Request.data`.
+
+ .. deprecated:: 3.3
+
+
.. method:: Request.get_type()
- Return the type of the URL --- also known as the scheme.
+ Return the type of the URL --- also known as the scheme. Deprecated in 3.3,
+ use :attr:`Request.type`.
+
+ .. deprecated:: 3.3
.. method:: Request.get_host()
- Return the host to which a connection will be made.
+ Return the host to which a connection will be made. Deprecated in 3.3, use
+ :attr:`Request.host`.
+
+ .. deprecated:: 3.3
.. method:: Request.get_selector()
Return the selector --- the part of the URL that is sent to the server.
+ Deprecated in 3.3, use :attr:`Request.selector`.
+ .. deprecated:: 3.3
.. method:: Request.get_header(header_name, default=None)
@@ -460,21 +524,22 @@ request.
.. method:: Request.set_proxy(host, type)
- Prepare the request by connecting to a proxy server. The *host* and *type* will
- replace those of the instance, and the instance's selector will be the original
- URL given in the constructor.
-
-
.. method:: Request.get_origin_req_host()
- Return the request-host of the origin transaction, as defined by :rfc:`2965`.
- See the documentation for the :class:`Request` constructor.
+ Return the request-host of the origin transaction, as defined by
+ :rfc:`2965`. See the documentation for the :class:`Request` constructor.
+ Deprecated in 3.3, use :attr:`Request.origin_req_host`.
+
+ .. deprecated:: 3.3
.. method:: Request.is_unverifiable()
Return whether the request is unverifiable, as defined by RFC 2965. See the
- documentation for the :class:`Request` constructor.
+ documentation for the :class:`Request` constructor. Deprecated in 3.3, use
+ :attr:`Request.unverifiable`.
+
+ .. deprecated:: 3.3
.. _opener-director-objects:
@@ -1138,16 +1203,14 @@ The following functions and classes are ported from the Python 2 module
``urllib`` (as opposed to ``urllib2``). They might become deprecated at
some point in the future.
-
.. function:: urlretrieve(url, filename=None, reporthook=None, data=None)
- Copy a network object denoted by a URL to a local file, if necessary. If the URL
- points to a local file, or a valid cached copy of the object exists, the object
- is not copied. Return a tuple ``(filename, headers)`` where *filename* is the
+ Copy a network object denoted by a URL to a local file. If the URL
+ points to a local file, the object will not be copied unless filename is supplied.
+ Return a tuple ``(filename, headers)`` where *filename* is the
local file name under which the object can be found, and *headers* is whatever
the :meth:`info` method of the object returned by :func:`urlopen` returned (for
- a remote object, possibly cached). Exceptions are the same as for
- :func:`urlopen`.
+ a remote object). Exceptions are the same as for :func:`urlopen`.
The second argument, if present, specifies the file location to copy to (if
absent, the location will be a tempfile with a generated name). The third
@@ -1158,6 +1221,13 @@ some point in the future.
third argument may be ``-1`` on older FTP servers which do not return a file
size in response to a retrieval request.
+ The following example illustrates the most common usage scenario::
+
+ >>> import urllib.request
+ >>> local_filename, headers = urllib.request.urlretrieve('http://python.org/')
+ >>> html = open(local_filename)
+ >>> html.close()
+
If the *url* uses the :file:`http:` scheme identifier, the optional *data*
argument may be given to specify a ``POST`` request (normally the request
type is ``GET``). The *data* argument must be a bytes object in standard
@@ -1170,20 +1240,20 @@ some point in the future.
the download is interrupted.
The *Content-Length* is treated as a lower bound: if there's more data to read,
- :func:`urlretrieve` reads more data, but if less data is available, it raises
- the exception.
+ urlretrieve reads more data, but if less data is available, it raises the
+ exception.
You can still retrieve the downloaded data in this case, it is stored in the
:attr:`content` attribute of the exception instance.
- If no *Content-Length* header was supplied, :func:`urlretrieve` can not check
- the size of the data it has downloaded, and just returns it. In this case
- you just have to assume that the download was successful.
+ If no *Content-Length* header was supplied, urlretrieve can not check the size
+ of the data it has downloaded, and just returns it. In this case you just have
+ to assume that the download was successful.
.. function:: urlcleanup()
- Clear the cache that may have been built up by previous calls to
- :func:`urlretrieve`.
+ Cleans up temporary files that may have been left behind by previous
+ calls to :func:`urlretrieve`.
.. class:: URLopener(proxies=None, **x509)
@@ -1207,7 +1277,7 @@ some point in the future.
*key_file* and *cert_file* are supported to provide an SSL key and certificate;
both are needed to support client authentication.
- :class:`URLopener` objects will raise an :exc:`IOError` exception if the server
+ :class:`URLopener` objects will raise an :exc:`OSError` exception if the server
returns an error code.
.. method:: open(fullurl, data=None)
diff --git a/Doc/library/urllib.rst b/Doc/library/urllib.rst
new file mode 100644
index 0000000..9ca74b8
--- /dev/null
+++ b/Doc/library/urllib.rst
@@ -0,0 +1,9 @@
+:mod:`urllib` --- URL handling modules
+======================================
+
+``urllib`` is a package that collects several modules for working with URLs:
+
+* :mod:`urllib.request` for opening and reading URLs
+* :mod:`urllib.error` containing the exceptions raised by :mod:`urllib.request`
+* :mod:`urllib.parse` for parsing URLs
+* :mod:`urllib.robotparser` for parsing ``robots.txt`` files
diff --git a/Doc/library/venv.rst b/Doc/library/venv.rst
new file mode 100644
index 0000000..2499962
--- /dev/null
+++ b/Doc/library/venv.rst
@@ -0,0 +1,180 @@
+:mod:`venv` --- Creation of virtual environments
+================================================
+
+.. module:: venv
+ :synopsis: Creation of virtual environments.
+.. moduleauthor:: Vinay Sajip <vinay_sajip@yahoo.co.uk>
+.. sectionauthor:: Vinay Sajip <vinay_sajip@yahoo.co.uk>
+
+
+.. index:: pair: Environments; virtual
+
+.. versionadded:: 3.3
+
+**Source code:** :source:`Lib/venv.py`
+
+--------------
+
+The :mod:`venv` module provides support for creating lightweight "virtual
+environments" with their own site directories, optionally isolated from system
+site directories. Each virtual environment has its own Python binary (allowing
+creation of environments with various Python versions) and can have its own
+independent set of installed Python packages in its site directories.
+
+
+Creating virtual environments
+-----------------------------
+
+.. include:: /using/venv-create.inc
+
+
+.. _venv-def:
+
+.. note:: A virtual environment (also called a ``venv``) is a Python
+ environment such that the Python interpreter, libraries and scripts
+ installed into it are isolated from those installed in other virtual
+ environments, and (by default) any libraries installed in a "system" Python,
+ i.e. one which is installed as part of your operating system.
+
+ A venv is a directory tree which contains Python executable files and
+ other files which indicate that it is a venv.
+
+ Common installation tools such as ``Distribute`` and ``pip`` work as
+ expected with venvs - i.e. when a venv is active, they install Python
+ packages into the venv without needing to be told to do so explicitly.
+ Of course, you need to install them into the venv first: this could be
+ done by running ``distribute_setup.py`` with the venv activated,
+ followed by running ``easy_install pip``. Alternatively, you could download
+ the source tarballs and run ``python setup.py install`` after unpacking,
+ with the venv activated.
+
+ When a venv is active (i.e. the venv's Python interpreter is running), the
+ attributes :attr:`sys.prefix` and :attr:`sys.exec_prefix` point to the base
+ directory of the venv, whereas :attr:`sys.base_prefix` and
+ :attr:`sys.base_exec_prefix` point to the non-venv Python installation
+ which was used to create the venv. If a venv is not active, then
+ :attr:`sys.prefix` is the same as :attr:`sys.base_prefix` and
+ :attr:`sys.exec_prefix` is the same as :attr:`sys.base_exec_prefix` (they
+ all point to a non-venv Python installation).
+
+
+API
+---
+
+.. highlight:: python
+
+The high-level method described above makes use of a simple API which provides
+mechanisms for third-party virtual environment creators to customize environment
+creation according to their needs, the :class:`EnvBuilder` class.
+
+.. class:: EnvBuilder(system_site_packages=False, clear=False, symlinks=False, upgrade=False)
+
+ The :class:`EnvBuilder` class accepts the following keyword arguments on
+ instantiation:
+
+ * ``system_site_packages`` -- a Boolean value indicating that the system Python
+ site-packages should be available to the environment (defaults to ``False``).
+
+ * ``clear`` -- a Boolean value which, if True, will delete any existing target
+ directory instead of raising an exception (defaults to ``False``).
+
+ * ``symlinks`` -- a Boolean value indicating whether to attempt to symlink the
+ Python binary (and any necessary DLLs or other binaries,
+ e.g. ``pythonw.exe``), rather than copying. Defaults to ``True`` on Linux and
+ Unix systems, but ``False`` on Windows.
+
+ * ``upgrade`` -- a Boolean value which, if True, will upgrade an existing
+ environment with the running Python - for use when that Python has been
+ upgraded in-place (defaults to ``False``).
+
+
+
+ Creators of third-party virtual environment tools will be free to use the
+ provided ``EnvBuilder`` class as a base class.
+
+ The returned env-builder is an object which has a method, ``create``:
+
+ .. method:: create(env_dir)
+
+ This method takes as required argument the path (absolute or relative to
+ the current directory) of the target directory which is to contain the
+ virtual environment. The ``create`` method will either create the
+ environment in the specified directory, or raise an appropriate
+ exception.
+
+ The ``create`` method of the ``EnvBuilder`` class illustrates the hooks
+ available for subclass customization::
+
+ def create(self, env_dir):
+ """
+ Create a virtualized Python environment in a directory.
+ env_dir is the target directory to create an environment in.
+ """
+ env_dir = os.path.abspath(env_dir)
+ context = self.create_directories(env_dir)
+ self.create_configuration(context)
+ self.setup_python(context)
+ self.setup_scripts(context)
+ self.post_setup(context)
+
+ Each of the methods :meth:`create_directories`,
+ :meth:`create_configuration`, :meth:`setup_python`,
+ :meth:`setup_scripts` and :meth:`post_setup` can be overridden.
+
+ .. method:: create_directories(env_dir)
+
+ Creates the environment directory and all necessary directories, and
+ returns a context object. This is just a holder for attributes (such as
+ paths), for use by the other methods.
+
+ .. method:: create_configuration(context)
+
+ Creates the ``pyvenv.cfg`` configuration file in the environment.
+
+ .. method:: setup_python(context)
+
+ Creates a copy of the Python executable (and, under Windows, DLLs) in
+ the environment.
+
+ .. method:: setup_scripts(context)
+
+ Installs activation scripts appropriate to the platform into the virtual
+ environment.
+
+ .. method:: post_setup(context)
+
+ A placeholder method which can be overridden in third party
+ implementations to pre-install packages in the virtual environment or
+ perform other post-creation steps.
+
+ In addition, :class:`EnvBuilder` provides this utility method that can be
+ called from :meth:`setup_scripts` or :meth:`post_setup` in subclasses to
+ assist in installing custom scripts into the virtual environment.
+
+ .. method:: install_scripts(context, path)
+
+ *path* is the path to a directory that should contain subdirectories
+ "common", "posix", "nt", each containing scripts destined for the bin
+ directory in the environment. The contents of "common" and the
+ directory corresponding to :data:`os.name` are copied after some text
+ replacement of placeholders:
+
+ * ``__VENV_DIR__`` is replaced with the absolute path of the environment
+ directory.
+
+ * ``__VENV_NAME__`` is replaced with the environment name (final path
+ segment of environment directory).
+
+ * ``__VENV_BIN_NAME__`` is replaced with the name of the bin directory
+ (either ``bin`` or ``Scripts``).
+
+ * ``__VENV_PYTHON__`` is replaced with the absolute path of the
+ environment's executable.
+
+
+There is also a module-level convenience function:
+
+.. function:: create(env_dir, system_site_packages=False, clear=False, symlinks=False)
+
+ Create an :class:`EnvBuilder` with the given keyword arguments, and call its
+ :meth:`~EnvBuilder.create` method with the *env_dir* argument.
diff --git a/Doc/library/warnings.rst b/Doc/library/warnings.rst
index 8af19a2..8387f5a 100644
--- a/Doc/library/warnings.rst
+++ b/Doc/library/warnings.rst
@@ -339,8 +339,7 @@ Available Functions
Write a warning to a file. The default implementation calls
``formatwarning(message, category, filename, lineno, line)`` and writes the
resulting string to *file*, which defaults to ``sys.stderr``. You may replace
- this function with an alternative implementation by assigning to
- ``warnings.showwarning``.
+ this function with any callable by assigning to ``warnings.showwarning``.
*line* is a line of source code to be included in the warning
message; if *line* is not supplied, :func:`showwarning` will
try to read the line specified by *filename* and *lineno*.
diff --git a/Doc/library/webbrowser.rst b/Doc/library/webbrowser.rst
index b2dcf8f..9c2b3ab 100644
--- a/Doc/library/webbrowser.rst
+++ b/Doc/library/webbrowser.rst
@@ -98,47 +98,55 @@ A number of browser types are predefined. This table gives the type names that
may be passed to the :func:`get` function and the corresponding instantiations
for the controller classes, all defined in this module.
-+-----------------------+-----------------------------------------+-------+
-| Type Name | Class Name | Notes |
-+=======================+=========================================+=======+
-| ``'mozilla'`` | :class:`Mozilla('mozilla')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'firefox'`` | :class:`Mozilla('mozilla')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'netscape'`` | :class:`Mozilla('netscape')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'galeon'`` | :class:`Galeon('galeon')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'epiphany'`` | :class:`Galeon('epiphany')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'skipstone'`` | :class:`BackgroundBrowser('skipstone')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'kfmclient'`` | :class:`Konqueror()` | \(1) |
-+-----------------------+-----------------------------------------+-------+
-| ``'konqueror'`` | :class:`Konqueror()` | \(1) |
-+-----------------------+-----------------------------------------+-------+
-| ``'kfm'`` | :class:`Konqueror()` | \(1) |
-+-----------------------+-----------------------------------------+-------+
-| ``'mosaic'`` | :class:`BackgroundBrowser('mosaic')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'opera'`` | :class:`Opera()` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'grail'`` | :class:`Grail()` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'links'`` | :class:`GenericBrowser('links')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'elinks'`` | :class:`Elinks('elinks')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'lynx'`` | :class:`GenericBrowser('lynx')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'w3m'`` | :class:`GenericBrowser('w3m')` | |
-+-----------------------+-----------------------------------------+-------+
-| ``'windows-default'`` | :class:`WindowsDefault` | \(2) |
-+-----------------------+-----------------------------------------+-------+
-| ``'macosx'`` | :class:`MacOSX('default')` | \(3) |
-+-----------------------+-----------------------------------------+-------+
-| ``'safari'`` | :class:`MacOSX('safari')` | \(3) |
-+-----------------------+-----------------------------------------+-------+
++------------------------+-----------------------------------------+-------+
+| Type Name | Class Name | Notes |
++========================+=========================================+=======+
+| ``'mozilla'`` | :class:`Mozilla('mozilla')` | |
++------------------------+-----------------------------------------+-------+
+| ``'firefox'`` | :class:`Mozilla('mozilla')` | |
++------------------------+-----------------------------------------+-------+
+| ``'netscape'`` | :class:`Mozilla('netscape')` | |
++------------------------+-----------------------------------------+-------+
+| ``'galeon'`` | :class:`Galeon('galeon')` | |
++------------------------+-----------------------------------------+-------+
+| ``'epiphany'`` | :class:`Galeon('epiphany')` | |
++------------------------+-----------------------------------------+-------+
+| ``'skipstone'`` | :class:`BackgroundBrowser('skipstone')` | |
++------------------------+-----------------------------------------+-------+
+| ``'kfmclient'`` | :class:`Konqueror()` | \(1) |
++------------------------+-----------------------------------------+-------+
+| ``'konqueror'`` | :class:`Konqueror()` | \(1) |
++------------------------+-----------------------------------------+-------+
+| ``'kfm'`` | :class:`Konqueror()` | \(1) |
++------------------------+-----------------------------------------+-------+
+| ``'mosaic'`` | :class:`BackgroundBrowser('mosaic')` | |
++------------------------+-----------------------------------------+-------+
+| ``'opera'`` | :class:`Opera()` | |
++------------------------+-----------------------------------------+-------+
+| ``'grail'`` | :class:`Grail()` | |
++------------------------+-----------------------------------------+-------+
+| ``'links'`` | :class:`GenericBrowser('links')` | |
++------------------------+-----------------------------------------+-------+
+| ``'elinks'`` | :class:`Elinks('elinks')` | |
++------------------------+-----------------------------------------+-------+
+| ``'lynx'`` | :class:`GenericBrowser('lynx')` | |
++------------------------+-----------------------------------------+-------+
+| ``'w3m'`` | :class:`GenericBrowser('w3m')` | |
++------------------------+-----------------------------------------+-------+
+| ``'windows-default'`` | :class:`WindowsDefault` | \(2) |
++------------------------+-----------------------------------------+-------+
+| ``'macosx'`` | :class:`MacOSX('default')` | \(3) |
++------------------------+-----------------------------------------+-------+
+| ``'safari'`` | :class:`MacOSX('safari')` | \(3) |
++------------------------+-----------------------------------------+-------+
+| ``'google-chrome'`` | :class:`Chrome('google-chrome')` | |
++------------------------+-----------------------------------------+-------+
+| ``'chrome'`` | :class:`Chrome('chrome')` | |
++------------------------+-----------------------------------------+-------+
+| ``'chromium'`` | :class:`Chromium('chromium')` | |
++------------------------+-----------------------------------------+-------+
+| ``'chromium-browser'`` | :class:`Chromium('chromium-browser')` | |
++------------------------+-----------------------------------------+-------+
Notes:
@@ -155,12 +163,15 @@ Notes:
(3)
Only on Mac OS X platform.
+.. versionadded:: 3.3
+ Support for Chrome/Chromium has been added.
+
Here are some simple examples::
- url = 'http://www.python.org/'
+ url = 'http://docs.python.org/'
# Open URL in a new tab, if a browser window is already open.
- webbrowser.open_new_tab(url + 'doc/')
+ webbrowser.open_new_tab(url)
# Open URL in new window, raising the window if possible.
webbrowser.open_new(url)
diff --git a/Doc/library/winreg.rst b/Doc/library/winreg.rst
index e49b51b..a878da2 100644
--- a/Doc/library/winreg.rst
+++ b/Doc/library/winreg.rst
@@ -38,7 +38,11 @@ This module offers the following functions:
*key* is the predefined handle to connect to.
The return value is the handle of the opened key. If the function fails, a
- :exc:`WindowsError` exception is raised.
+ :exc:`OSError` exception is raised.
+
+ .. versionchanged:: 3.3
+ This function used to raise a :exc:`WindowsError`, which is now an
+ alias of :exc:`OSError`.
.. function:: CreateKey(key, sub_key)
@@ -57,7 +61,11 @@ This module offers the following functions:
If the key already exists, this function opens the existing key.
The return value is the handle of the opened key. If the function fails, a
- :exc:`WindowsError` exception is raised.
+ :exc:`OSError` exception is raised.
+
+ .. versionchanged:: 3.3
+ This function used to raise a :exc:`WindowsError`, which is now an
+ alias of :exc:`OSError`.
.. function:: CreateKeyEx(key, sub_key, reserved=0, access=KEY_ALL_ACCESS)
@@ -82,10 +90,14 @@ This module offers the following functions:
If the key already exists, this function opens the existing key.
The return value is the handle of the opened key. If the function fails, a
- :exc:`WindowsError` exception is raised.
+ :exc:`OSError` exception is raised.
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ This function used to raise a :exc:`WindowsError`, which is now an
+ alias of :exc:`OSError`.
+
.. function:: DeleteKey(key, sub_key)
@@ -100,7 +112,11 @@ This module offers the following functions:
*This method can not delete keys with subkeys.*
If the method succeeds, the entire key, including all of its values, is removed.
- If the method fails, a :exc:`WindowsError` exception is raised.
+ If the method fails, a :exc:`OSError` exception is raised.
+
+ .. versionchanged:: 3.3
+ This function used to raise a :exc:`WindowsError`, which is now an
+ alias of :exc:`OSError`.
.. function:: DeleteKeyEx(key, sub_key, access=KEY_ALL_ACCESS, reserved=0)
@@ -129,12 +145,16 @@ This module offers the following functions:
*This method can not delete keys with subkeys.*
If the method succeeds, the entire key, including all of its values, is
- removed. If the method fails, a :exc:`WindowsError` exception is raised.
+ removed. If the method fails, a :exc:`OSError` exception is raised.
On unsupported Windows versions, :exc:`NotImplementedError` is raised.
.. versionadded:: 3.2
+ .. versionchanged:: 3.3
+ This function used to raise a :exc:`WindowsError`, which is now an
+ alias of :exc:`OSError`.
+
.. function:: DeleteValue(key, value)
@@ -156,9 +176,13 @@ This module offers the following functions:
*index* is an integer that identifies the index of the key to retrieve.
The function retrieves the name of one subkey each time it is called. It is
- typically called repeatedly until a :exc:`WindowsError` exception is
+ typically called repeatedly until a :exc:`OSError` exception is
raised, indicating, no more values are available.
+ .. versionchanged:: 3.3
+ This function used to raise a :exc:`WindowsError`, which is now an
+ alias of :exc:`OSError`.
+
.. function:: EnumValue(key, index)
@@ -170,7 +194,7 @@ This module offers the following functions:
*index* is an integer that identifies the index of the value to retrieve.
The function retrieves the name of one subkey each time it is called. It is
- typically called repeatedly, until a :exc:`WindowsError` exception is
+ typically called repeatedly, until a :exc:`OSError` exception is
raised, indicating no more values.
The result is a tuple of 3 items:
@@ -189,6 +213,10 @@ This module offers the following functions:
| | :meth:`SetValueEx`) |
+-------+--------------------------------------------+
+ .. versionchanged:: 3.3
+ This function used to raise a :exc:`WindowsError`, which is now an
+ alias of :exc:`OSError`.
+
.. function:: ExpandEnvironmentStrings(str)
@@ -260,9 +288,14 @@ This module offers the following functions:
The result is a new handle to the specified key.
- If the function fails, :exc:`WindowsError` is raised.
+ If the function fails, :exc:`OSError` is raised.
+
+ .. versionchanged:: 3.2
+ Allow the use of named arguments.
- .. versionchanged:: 3.2 Allow the use of named arguments.
+ .. versionchanged:: 3.3
+ This function used to raise a :exc:`WindowsError`, which is now an
+ alias of :exc:`OSError`.
.. function:: OpenKeyEx()
diff --git a/Doc/library/wsgiref.rst b/Doc/library/wsgiref.rst
index 1fd3451..1cef2e9 100644
--- a/Doc/library/wsgiref.rst
+++ b/Doc/library/wsgiref.rst
@@ -609,6 +609,9 @@ input, output, and error streams.
as :class:`BaseCGIHandler` and :class:`CGIHandler`) that are not HTTP origin
servers.
+ .. versionchanged:: 3.3
+ The term "Python" is replaced with implementation specific term like
+ "CPython", "Jython" etc.
.. method:: BaseHandler.get_scheme()
diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst
index ae286b0..a5c6fb2 100644
--- a/Doc/library/xml.dom.minidom.rst
+++ b/Doc/library/xml.dom.minidom.rst
@@ -147,12 +147,7 @@ module documentation. This section lists the differences between the API and
the DOM node.
With an explicit *encoding* [1]_ argument, the result is a byte
- string in the specified encoding. It is recommended that you
- always specify an encoding; you may use any encoding you like, but
- an argument of "utf-8" is the most common choice, avoiding
- :exc:`UnicodeError` exceptions in case of unrepresentable text
- data.
-
+ string in the specified encoding.
With no *encoding* argument, the result is a Unicode string, and the
XML declaration in the resulting string does not specify an
encoding. Encoding this string in an encoding other than UTF-8 is
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index c5c8802..26f1fbe 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -5,49 +5,323 @@
:synopsis: Implementation of the ElementTree API.
.. moduleauthor:: Fredrik Lundh <fredrik@pythonware.com>
-**Source code:** :source:`Lib/xml/etree/ElementTree.py`
+The :mod:`xml.etree.ElementTree` module implements a simple and efficient API
+for parsing and creating XML data.
+
+.. versionchanged:: 3.3
+ This module will use a fast implementation whenever available.
+ The :mod:`xml.etree.cElementTree` module is deprecated.
+
+Tutorial
+--------
+
+This is a short tutorial for using :mod:`xml.etree.ElementTree` (``ET`` in
+short). The goal is to demonstrate some of the building blocks and basic
+concepts of the module.
+
+XML tree and elements
+^^^^^^^^^^^^^^^^^^^^^
+
+XML is an inherently hierarchical data format, and the most natural way to
+represent it is with a tree. ``ET`` has two classes for this purpose -
+:class:`ElementTree` represents the whole XML document as a tree, and
+:class:`Element` represents a single node in this tree. Interactions with
+the whole document (reading and writing to/from files) are usually done
+on the :class:`ElementTree` level. Interactions with a single XML element
+and its sub-elements are done on the :class:`Element` level.
+
+.. _elementtree-parsing-xml:
+
+Parsing XML
+^^^^^^^^^^^
+
+We'll be using the following XML document as the sample data for this section:
+
+.. code-block:: xml
+
+ <?xml version="1.0"?>
+ <data>
+ <country name="Liechtenstein">
+ <rank>1</rank>
+ <year>2008</year>
+ <gdppc>141100</gdppc>
+ <neighbor name="Austria" direction="E"/>
+ <neighbor name="Switzerland" direction="W"/>
+ </country>
+ <country name="Singapore">
+ <rank>4</rank>
+ <year>2011</year>
+ <gdppc>59900</gdppc>
+ <neighbor name="Malaysia" direction="N"/>
+ </country>
+ <country name="Panama">
+ <rank>68</rank>
+ <year>2011</year>
+ <gdppc>13600</gdppc>
+ <neighbor name="Costa Rica" direction="W"/>
+ <neighbor name="Colombia" direction="E"/>
+ </country>
+ </data>
+
+We can import this data by reading from a file::
+
+ import xml.etree.ElementTree as ET
+ tree = ET.parse('country_data.xml')
+ root = tree.getroot()
+
+Or directly from a string::
+
+ root = ET.fromstring(country_data_as_string)
+
+:func:`fromstring` parses XML from a string directly into an :class:`Element`,
+which is the root element of the parsed tree. Other parsing functions may
+create an :class:`ElementTree`. Check the documentation to be sure.
+
+As an :class:`Element`, ``root`` has a tag and a dictionary of attributes::
+
+ >>> root.tag
+ 'data'
+ >>> root.attrib
+ {}
+
+It also has children nodes over which we can iterate::
+
+ >>> for child in root:
+ ... print(child.tag, child.attrib)
+ ...
+ country {'name': 'Liechtenstein'}
+ country {'name': 'Singapore'}
+ country {'name': 'Panama'}
+
+Children are nested, and we can access specific child nodes by index::
+
+ >>> root[0][1].text
+ '2008'
+
+Finding interesting elements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:class:`Element` has some useful methods that help iterate recursively over all
+the sub-tree below it (its children, their children, and so on). For example,
+:meth:`Element.iter`::
+
+ >>> for neighbor in root.iter('neighbor'):
+ ... print(neighbor.attrib)
+ ...
+ {'name': 'Austria', 'direction': 'E'}
+ {'name': 'Switzerland', 'direction': 'W'}
+ {'name': 'Malaysia', 'direction': 'N'}
+ {'name': 'Costa Rica', 'direction': 'W'}
+ {'name': 'Colombia', 'direction': 'E'}
+
+:meth:`Element.findall` finds only elements with a tag which are direct
+children of the current element. :meth:`Element.find` finds the *first* child
+with a particular tag, and :meth:`Element.text` accesses the element's text
+content. :meth:`Element.get` accesses the element's attributes::
+
+ >>> for country in root.findall('country'):
+ ... rank = country.find('rank').text
+ ... name = country.get('name')
+ ... print(name, rank)
+ ...
+ Liechtenstein 1
+ Singapore 4
+ Panama 68
+
+More sophisticated specification of which elements to look for is possible by
+using :ref:`XPath <elementtree-xpath>`.
+
+Modifying an XML File
+^^^^^^^^^^^^^^^^^^^^^
+
+:class:`ElementTree` provides a simple way to build XML documents and write them to files.
+The :meth:`ElementTree.write` method serves this purpose.
+
+Once created, an :class:`Element` object may be manipulated by directly changing
+its fields (such as :attr:`Element.text`), adding and modifying attributes
+(:meth:`Element.set` method), as well as adding new children (for example
+with :meth:`Element.append`).
+
+Let's say we want to add one to each country's rank, and add an ``updated``
+attribute to the rank element::
+
+ >>> for rank in root.iter('rank'):
+ ... new_rank = int(rank.text) + 1
+ ... rank.text = str(new_rank)
+ ... rank.set('updated', 'yes')
+ ...
+ >>> tree.write('output.xml')
+
+Our XML now looks like this:
+
+.. code-block:: xml
+
+ <?xml version="1.0"?>
+ <data>
+ <country name="Liechtenstein">
+ <rank updated="yes">2</rank>
+ <year>2008</year>
+ <gdppc>141100</gdppc>
+ <neighbor name="Austria" direction="E"/>
+ <neighbor name="Switzerland" direction="W"/>
+ </country>
+ <country name="Singapore">
+ <rank updated="yes">5</rank>
+ <year>2011</year>
+ <gdppc>59900</gdppc>
+ <neighbor name="Malaysia" direction="N"/>
+ </country>
+ <country name="Panama">
+ <rank updated="yes">69</rank>
+ <year>2011</year>
+ <gdppc>13600</gdppc>
+ <neighbor name="Costa Rica" direction="W"/>
+ <neighbor name="Colombia" direction="E"/>
+ </country>
+ </data>
+
+We can remove elements using :meth:`Element.remove`. Let's say we want to
+remove all countries with a rank higher than 50::
+
+ >>> for country in root.findall('country'):
+ ... rank = int(country.find('rank').text)
+ ... if rank > 50:
+ ... root.remove(country)
+ ...
+ >>> tree.write('output.xml')
+
+Our XML now looks like this:
+
+.. code-block:: xml
+
+ <?xml version="1.0"?>
+ <data>
+ <country name="Liechtenstein">
+ <rank updated="yes">2</rank>
+ <year>2008</year>
+ <gdppc>141100</gdppc>
+ <neighbor name="Austria" direction="E"/>
+ <neighbor name="Switzerland" direction="W"/>
+ </country>
+ <country name="Singapore">
+ <rank updated="yes">5</rank>
+ <year>2011</year>
+ <gdppc>59900</gdppc>
+ <neighbor name="Malaysia" direction="N"/>
+ </country>
+ </data>
+
+Building XML documents
+^^^^^^^^^^^^^^^^^^^^^^
+
+The :func:`SubElement` function also provides a convenient way to create new
+sub-elements for a given element::
+
+ >>> a = ET.Element('a')
+ >>> b = ET.SubElement(a, 'b')
+ >>> c = ET.SubElement(a, 'c')
+ >>> d = ET.SubElement(c, 'd')
+ >>> ET.dump(a)
+ <a><b /><c><d /></c></a>
+
+Additional resources
+^^^^^^^^^^^^^^^^^^^^
---------------
-
-The :class:`Element` type is a flexible container object, designed to store
-hierarchical data structures in memory. The type can be described as a cross
-between a list and a dictionary.
-
-Each element has a number of properties associated with it:
-
-* a tag which is a string identifying what kind of data this element represents
- (the element type, in other words).
-
-* a number of attributes, stored in a Python dictionary.
-
-* a text string.
-
-* an optional tail string.
-
-* a number of child elements, stored in a Python sequence
-
-To create an element instance, use the :class:`Element` constructor or the
-:func:`SubElement` factory function.
-
-The :class:`ElementTree` class can be used to wrap an element structure, and
-convert it from and to XML.
+See http://effbot.org/zone/element-index.htm for tutorials and links to other
+docs.
-A C implementation of this API is available as :mod:`xml.etree.cElementTree`.
-See http://effbot.org/zone/element-index.htm for tutorials and links to other
-docs. Fredrik Lundh's page is also the location of the development version of
-the xml.etree.ElementTree.
+.. _elementtree-xpath:
-.. versionchanged:: 3.2
- The ElementTree API is updated to 1.3. For more information, see
- `Introducing ElementTree 1.3
- <http://effbot.org/zone/elementtree-13-intro.htm>`_.
+XPath support
+-------------
+This module provides limited support for
+`XPath expressions <http://www.w3.org/TR/xpath>`_ for locating elements in a
+tree. The goal is to support a small subset of the abbreviated syntax; a full
+XPath engine is outside the scope of the module.
+
+Example
+^^^^^^^
+
+Here's an example that demonstrates some of the XPath capabilities of the
+module. We'll be using the ``countrydata`` XML document from the
+:ref:`Parsing XML <elementtree-parsing-xml>` section::
+
+ import xml.etree.ElementTree as ET
+
+ root = ET.fromstring(countrydata)
+
+ # Top-level elements
+ root.findall(".")
+
+ # All 'neighbor' grand-children of 'country' children of the top-level
+ # elements
+ root.findall("./country/neighbor")
+
+ # Nodes with name='Singapore' that have a 'year' child
+ root.findall(".//year/..[@name='Singapore']")
+
+ # 'year' nodes that are children of nodes with name='Singapore'
+ root.findall(".//*[@name='Singapore']/year")
+
+ # All 'neighbor' nodes that are the second child of their parent
+ root.findall(".//neighbor[2]")
+
+Supported XPath syntax
+^^^^^^^^^^^^^^^^^^^^^^
+
++-----------------------+------------------------------------------------------+
+| Syntax | Meaning |
++=======================+======================================================+
+| ``tag`` | Selects all child elements with the given tag. |
+| | For example, ``spam`` selects all child elements |
+| | named ``spam``, ``spam/egg`` selects all |
+| | grandchildren named ``egg`` in all children named |
+| | ``spam``. |
++-----------------------+------------------------------------------------------+
+| ``*`` | Selects all child elements. For example, ``*/egg`` |
+| | selects all grandchildren named ``egg``. |
++-----------------------+------------------------------------------------------+
+| ``.`` | Selects the current node. This is mostly useful |
+| | at the beginning of the path, to indicate that it's |
+| | a relative path. |
++-----------------------+------------------------------------------------------+
+| ``//`` | Selects all subelements, on all levels beneath the |
+| | current element. For example, ``.//egg`` selects |
+| | all ``egg`` elements in the entire tree. |
++-----------------------+------------------------------------------------------+
+| ``..`` | Selects the parent element. Returns ``None`` if the |
+| | path attempts to reach the ancestors of the start |
+| | element (the element ``find`` was called on). |
++-----------------------+------------------------------------------------------+
+| ``[@attrib]`` | Selects all elements that have the given attribute. |
++-----------------------+------------------------------------------------------+
+| ``[@attrib='value']`` | Selects all elements for which the given attribute |
+| | has the given value. The value cannot contain |
+| | quotes. |
++-----------------------+------------------------------------------------------+
+| ``[tag]`` | Selects all elements that have a child named |
+| | ``tag``. Only immediate children are supported. |
++-----------------------+------------------------------------------------------+
+| ``[position]`` | Selects all elements that are located at the given |
+| | position. The position can be either an integer |
+| | (1 is the first position), the expression ``last()`` |
+| | (for the last position), or a position relative to |
+| | the last position (e.g. ``last()-1``). |
++-----------------------+------------------------------------------------------+
+
+Predicates (expressions within square brackets) must be preceded by a tag
+name, an asterisk, or another predicate. ``position`` predicates must be
+preceded by a tag name.
+
+Reference
+---------
.. _elementtree-functions:
Functions
----------
+^^^^^^^^^
.. function:: Comment(text=None)
@@ -159,9 +433,9 @@ Functions
Generates a string representation of an XML element, including all
subelements. *element* is an :class:`Element` instance. *encoding* [1]_ is
the output encoding (default is US-ASCII). Use ``encoding="unicode"`` to
- generate a Unicode string. *method* is either ``"xml"``,
- ``"html"`` or ``"text"`` (default is ``"xml"``). Returns an (optionally)
- encoded string containing the XML data.
+ generate a Unicode string (otherwise, a bytestring is generated). *method*
+ is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
+ Returns an (optionally) encoded string containing the XML data.
.. function:: tostringlist(element, encoding="us-ascii", method="xml")
@@ -169,11 +443,11 @@ Functions
Generates a string representation of an XML element, including all
subelements. *element* is an :class:`Element` instance. *encoding* [1]_ is
the output encoding (default is US-ASCII). Use ``encoding="unicode"`` to
- generate a Unicode string. *method* is either ``"xml"``,
- ``"html"`` or ``"text"`` (default is ``"xml"``). Returns a list of
- (optionally) encoded strings containing the XML data. It does not guarantee
- any specific sequence, except that ``"".join(tostringlist(element)) ==
- tostring(element)``.
+ generate a Unicode string (otherwise, a bytestring is generated). *method*
+ is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
+ Returns a list of (optionally) encoded strings containing the XML data.
+ It does not guarantee any specific sequence, except that
+ ``"".join(tostringlist(element)) == tostring(element)``.
.. versionadded:: 3.2
@@ -198,8 +472,7 @@ Functions
.. _elementtree-element-objects:
Element Objects
----------------
-
+^^^^^^^^^^^^^^^
.. class:: Element(tag, attrib={}, **extra)
@@ -250,7 +523,7 @@ Element Objects
.. method:: clear()
Resets an element. This function removes all subelements, clears all
- attributes, and sets the text and tail attributes to None.
+ attributes, and sets the text and tail attributes to ``None``.
.. method:: get(key, default=None)
@@ -281,36 +554,43 @@ Element Objects
.. method:: append(subelement)
- Adds the element *subelement* to the end of this elements internal list
- of subelements.
+ Adds the element *subelement* to the end of this element's internal list
+ of subelements. Raises :exc:`TypeError` if *subelement* is not an
+ :class:`Element`.
.. method:: extend(subelements)
Appends *subelements* from a sequence object with zero or more elements.
- Raises :exc:`AssertionError` if a subelement is not a valid object.
+ Raises :exc:`TypeError` if a subelement is not an :class:`Element`.
.. versionadded:: 3.2
- .. method:: find(match)
+ .. method:: find(match, namespaces=None)
Finds the first subelement matching *match*. *match* may be a tag name
- or path. Returns an element instance or ``None``.
+ or a :ref:`path <elementtree-xpath>`. Returns an element instance
+ or ``None``. *namespaces* is an optional mapping from namespace prefix
+ to full name.
- .. method:: findall(match)
+ .. method:: findall(match, namespaces=None)
- Finds all matching subelements, by tag name or path. Returns a list
- containing all matching elements in document order.
+ Finds all matching subelements, by tag name or
+ :ref:`path <elementtree-xpath>`. Returns a list containing all matching
+ elements in document order. *namespaces* is an optional mapping from
+ namespace prefix to full name.
- .. method:: findtext(match, default=None)
+ .. method:: findtext(match, default=None, namespaces=None)
Finds text for the first subelement matching *match*. *match* may be
- a tag name or path. Returns the text content of the first matching
- element, or *default* if no element was found. Note that if the matching
- element has no text content an empty string is returned.
+ a tag name or a :ref:`path <elementtree-xpath>`. Returns the text content
+ of the first matching element, or *default* if no element was found.
+ Note that if the matching element has no text content an empty string
+ is returned. *namespaces* is an optional mapping from namespace prefix
+ to full name.
.. method:: getchildren()
@@ -325,9 +605,10 @@ Element Objects
Use method :meth:`Element.iter` instead.
- .. method:: insert(index, element)
+ .. method:: insert(index, subelement)
- Inserts a subelement at the given position in this element.
+ Inserts *subelement* at the given position in this element. Raises
+ :exc:`TypeError` if *subelement* is not an :class:`Element`.
.. method:: iter(tag=None)
@@ -341,10 +622,13 @@ Element Objects
.. versionadded:: 3.2
- .. method:: iterfind(match)
+ .. method:: iterfind(match, namespaces=None)
+
+ Finds all matching subelements, by tag name or
+ :ref:`path <elementtree-xpath>`. Returns an iterable yielding all
+ matching elements in document order. *namespaces* is an optional mapping
+ from namespace prefix to full name.
- Finds all matching subelements, by tag name or path. Returns an iterable
- yielding all matching elements in document order.
.. versionadded:: 3.2
@@ -389,7 +673,7 @@ Element Objects
.. _elementtree-elementtree-objects:
ElementTree Objects
--------------------
+^^^^^^^^^^^^^^^^^^^
.. class:: ElementTree(element=None, file=None)
@@ -409,28 +693,19 @@ ElementTree Objects
care. *element* is an element instance.
- .. method:: find(match)
+ .. method:: find(match, namespaces=None)
- Finds the first toplevel element matching *match*. *match* may be a tag
- name or path. Same as getroot().find(match). Returns the first matching
- element, or ``None`` if no element was found.
+ Same as :meth:`Element.find`, starting at the root of the tree.
- .. method:: findall(match)
+ .. method:: findall(match, namespaces=None)
- Finds all matching subelements, by tag name or path. Same as
- getroot().findall(match). *match* may be a tag name or path. Returns a
- list containing all matching elements, in document order.
+ Same as :meth:`Element.findall`, starting at the root of the tree.
- .. method:: findtext(match, default=None)
+ .. method:: findtext(match, default=None, namespaces=None)
- Finds the element text for the first toplevel element with given tag.
- Same as getroot().findtext(match). *match* may be a tag name or path.
- *default* is the value to return if the element was not found. Returns
- the text content of the first matching element, or the default value no
- element was found. Note that if the element is found, but has no text
- content, this method returns an empty string.
+ Same as :meth:`Element.findtext`, starting at the root of the tree.
.. method:: getiterator(tag=None)
@@ -451,11 +726,9 @@ ElementTree Objects
to look for (default is to return all elements)
- .. method:: iterfind(match)
+ .. method:: iterfind(match, namespaces=None)
- Finds all matching subelements, by tag name or path. Same as
- getroot().iterfind(match). Returns an iterable yielding all matching
- elements in document order.
+ Same as :meth:`Element.iterfind`, starting at the root of the tree.
.. versionadded:: 3.2
@@ -464,20 +737,29 @@ ElementTree Objects
Loads an external XML section into this element tree. *source* is a file
name or :term:`file object`. *parser* is an optional parser instance.
- If not given, the standard XMLParser parser is used. Returns the section
- root element.
+ If not given, the standard :class:`XMLParser` parser is used. Returns the
+ section root element.
- .. method:: write(file, encoding="us-ascii", xml_declaration=None, method="xml")
+ .. method:: write(file, encoding="us-ascii", xml_declaration=None, \
+ method="xml")
Writes the element tree to a file, as XML. *file* is a file name, or a
- :term:`file object` opened for writing. *encoding* [1]_ is the output encoding
- (default is US-ASCII). Use ``encoding="unicode"`` to write a Unicode string.
- *xml_declaration* controls if an XML declaration
- should be added to the file. Use False for never, True for always, None
- for only if not US-ASCII or UTF-8 or Unicode (default is None). *method* is
- either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
- Returns an (optionally) encoded string.
+ :term:`file object` opened for writing. *encoding* [1]_ is the output
+ encoding (default is US-ASCII).
+ *xml_declaration* controls if an XML declaration should be added to the
+ file. Use ``False`` for never, ``True`` for always, ``None``
+ for only if not US-ASCII or UTF-8 or Unicode (default is ``None``).
+ *method* is either ``"xml"``, ``"html"`` or ``"text"`` (default is
+ ``"xml"``).
+
+ The output is either a string (:class:`str`) or binary (:class:`bytes`).
+ This is controlled by the *encoding* argument. If *encoding* is
+ ``"unicode"``, the output is a string; otherwise, it's binary. Note that
+ this may conflict with the type of *file* if it's an open
+ :term:`file object`; make sure you do not try to write a string to a
+ binary stream and vice versa.
+
This is the XML file that is going to be manipulated::
@@ -510,7 +792,7 @@ Example of changing the attribute "target" of every link in first paragraph::
.. _elementtree-qname-objects:
QName Objects
--------------
+^^^^^^^^^^^^^
.. class:: QName(text_or_uri, tag=None)
@@ -526,7 +808,7 @@ QName Objects
.. _elementtree-treebuilder-objects:
TreeBuilder Objects
--------------------
+^^^^^^^^^^^^^^^^^^^
.. class:: TreeBuilder(element_factory=None)
@@ -534,9 +816,9 @@ TreeBuilder Objects
Generic element structure builder. This builder converts a sequence of
start, data, and end method calls to a well-formed element structure. You
can use this class to build an element structure using a custom XML parser,
- or a parser for some other XML-like format. The *element_factory* is called
- to create new :class:`Element` instances when given.
-
+ or a parser for some other XML-like format. *element_factory*, when given,
+ must be a callable accepting two positional arguments: a tag and
+ a dict of attributes. It is expected to return a new element instance.
.. method:: close()
@@ -577,7 +859,7 @@ TreeBuilder Objects
.. _elementtree-xmlparser-objects:
XMLParser Objects
------------------
+^^^^^^^^^^^^^^^^^
.. class:: XMLParser(html=0, target=None, encoding=None)
@@ -585,9 +867,9 @@ XMLParser Objects
:class:`Element` structure builder for XML source data, based on the expat
parser. *html* are predefined HTML entities. This flag is not supported by
the current implementation. *target* is the target object. If omitted, the
- builder uses an instance of the standard TreeBuilder class. *encoding* [1]_
- is optional. If given, the value overrides the encoding specified in the
- XML file.
+ builder uses an instance of the standard :class:`TreeBuilder` class.
+ *encoding* [1]_ is optional. If given, the value overrides the encoding
+ specified in the XML file.
.. method:: close()
@@ -645,6 +927,24 @@ This is an example of counting the maximum depth of an XML file::
>>> parser.close()
4
+Exceptions
+^^^^^^^^^^
+
+.. class:: ParseError
+
+ XML parse error, raised by the various parsing methods in this module when
+ parsing fails. The string representation of an instance of this exception
+ will contain a user-friendly error message. In addition, it will have
+ the following attributes available:
+
+ .. attribute:: code
+
+ A numeric error code from the expat parser. See the documentation of
+ :mod:`xml.parsers.expat` for the list of error codes and their meanings.
+
+ .. attribute:: position
+
+ A tuple of *line*, *column* numbers, specifying where the error occurred.
.. rubric:: Footnotes
diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst
new file mode 100644
index 0000000..21b2e23
--- /dev/null
+++ b/Doc/library/xml.rst
@@ -0,0 +1,29 @@
+.. _xml:
+
+XML Processing Modules
+======================
+
+Python's interfaces for processing XML are grouped in the ``xml`` package.
+
+It is important to note that modules in the :mod:`xml` package require that
+there be at least one SAX-compliant XML parser available. The Expat parser is
+included with Python, so the :mod:`xml.parsers.expat` module will always be
+available.
+
+The documentation for the :mod:`xml.dom` and :mod:`xml.sax` packages are the
+definition of the Python bindings for the DOM and SAX interfaces.
+
+The XML handling submodules are:
+
+* :mod:`xml.etree.ElementTree`: the ElementTree API, a simple and lightweight
+
+..
+
+* :mod:`xml.dom`: the DOM API definition
+* :mod:`xml.dom.minidom`: a lightweight DOM implementation
+* :mod:`xml.dom.pulldom`: support for building partial DOM trees
+
+..
+
+* :mod:`xml.sax`: SAX2 base classes and convenience functions
+* :mod:`xml.parsers.expat`: the Expat parser binding
diff --git a/Doc/library/xml.sax.utils.rst b/Doc/library/xml.sax.utils.rst
index ff36fd8..0a4038c 100644
--- a/Doc/library/xml.sax.utils.rst
+++ b/Doc/library/xml.sax.utils.rst
@@ -62,7 +62,7 @@ or as base classes.
tags, if set to *True* they are emitted as a single self-closed tag.
.. versionadded:: 3.2
- short_empty_elements
+ The *short_empty_elements* parameter.
.. class:: XMLFilterBase(base)
diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst
index e72770a..1871c99 100644
--- a/Doc/library/xmlrpc.client.rst
+++ b/Doc/library/xmlrpc.client.rst
@@ -8,7 +8,7 @@
.. XXX Not everything is documented yet. It might be good to describe
- Marshaller, Unmarshaller, getparser, dumps, loads, and Transport.
+ Marshaller, Unmarshaller, getparser and Transport.
**Source code:** :source:`Lib/xmlrpc/client.py`
@@ -21,7 +21,12 @@ supports writing XML-RPC client code; it handles all the details of translating
between conformable Python objects and XML on the wire.
-.. class:: ServerProxy(uri, transport=None, encoding=None, verbose=False, allow_none=False, use_datetime=False)
+.. class:: ServerProxy(uri, transport=None, encoding=None, verbose=False, \
+ allow_none=False, use_datetime=False, \
+ use_builtin_types=False)
+
+ .. versionchanged:: 3.3
+ The *use_builtin_types* flag was added.
A :class:`ServerProxy` instance is an object that manages communication with a
remote XML-RPC server. The required first argument is a URI (Uniform Resource
@@ -34,9 +39,13 @@ between conformable Python objects and XML on the wire.
XML; the default behaviour is for ``None`` to raise a :exc:`TypeError`. This is
a commonly-used extension to the XML-RPC specification, but isn't supported by
all clients and servers; see http://ontosys.com/xml-rpc/extensions.php for a
- description. The *use_datetime* flag can be used to cause date/time values to
- be presented as :class:`datetime.datetime` objects; this is false by default.
- :class:`datetime.datetime` objects may be passed to calls.
+ description. The *use_builtin_types* flag can be used to cause date/time values
+ to be presented as :class:`datetime.datetime` objects and binary data to be
+ presented as :class:`bytes` objects; this flag is false by default.
+ :class:`datetime.datetime` and :class:`bytes` objects may be passed to calls.
+
+ The obsolete *use_datetime* flag is similar to *use_builtin_types* but it
+ applies only to date/time values.
Both the HTTP and HTTPS transports support the URL syntax extension for HTTP
Basic Authentication: ``http://user:pass@host:port/path``. The ``user:pass``
@@ -78,12 +87,12 @@ between conformable Python objects and XML on the wire.
| | only their *__dict__* attribute is |
| | transmitted. |
+---------------------------------+---------------------------------------------+
- | :const:`dates` | in seconds since the epoch (pass in an |
- | | instance of the :class:`DateTime` class) or |
+ | :const:`dates` | In seconds since the epoch. Pass in an |
+ | | instance of the :class:`DateTime` class or |
| | a :class:`datetime.datetime` instance. |
+---------------------------------+---------------------------------------------+
- | :const:`binary data` | pass in an instance of the :class:`Binary` |
- | | wrapper class |
+ | :const:`binary data` | Pass in an instance of the :class:`Binary` |
+ | | wrapper class or a :class:`bytes` instance. |
+---------------------------------+---------------------------------------------+
This is the full set of data types supported by XML-RPC. Method calls may also
@@ -98,8 +107,9 @@ between conformable Python objects and XML on the wire.
ensure that the string is free of characters that aren't allowed in XML, such as
the control characters with ASCII values between 0 and 31 (except, of course,
tab, newline and carriage return); failing to do this will result in an XML-RPC
- request that isn't well-formed XML. If you have to pass arbitrary strings via
- XML-RPC, use the :class:`Binary` wrapper class described below.
+ request that isn't well-formed XML. If you have to pass arbitrary bytes
+ via XML-RPC, use the :class:`bytes` class or the class:`Binary` wrapper class
+ described below.
:class:`Server` is retained as an alias for :class:`ServerProxy` for backwards
compatibility. New code should use :class:`ServerProxy`.
@@ -249,7 +259,7 @@ The client code for the preceding server::
Binary Objects
--------------
-This class may be initialized from string data (which may include NULs). The
+This class may be initialized from bytes data (which may include NULs). The
primary access to the content of a :class:`Binary` object is provided by an
attribute:
@@ -257,15 +267,15 @@ attribute:
.. attribute:: Binary.data
The binary data encapsulated by the :class:`Binary` instance. The data is
- provided as an 8-bit string.
+ provided as a :class:`bytes` object.
:class:`Binary` objects have the following methods, supported mainly for
internal use by the marshalling/unmarshalling code:
-.. method:: Binary.decode(string)
+.. method:: Binary.decode(bytes)
- Accept a base64 string and decode it as the instance's new data.
+ Accept a base64 :class:`bytes` object and decode it as the instance's new data.
.. method:: Binary.encode(out)
@@ -471,14 +481,21 @@ Convenience Functions
it via an extension, provide a true value for *allow_none*.
-.. function:: loads(data, use_datetime=False)
+.. function:: loads(data, use_datetime=False, use_builtin_types=False)
Convert an XML-RPC request or response into Python objects, a ``(params,
methodname)``. *params* is a tuple of argument; *methodname* is a string, or
``None`` if no method name is present in the packet. If the XML-RPC packet
represents a fault condition, this function will raise a :exc:`Fault` exception.
- The *use_datetime* flag can be used to cause date/time values to be presented as
- :class:`datetime.datetime` objects; this is false by default.
+ The *use_builtin_types* flag can be used to cause date/time values to be
+ presented as :class:`datetime.datetime` objects and binary data to be
+ presented as :class:`bytes` objects; this flag is false by default.
+
+ The obsolete *use_datetime* flag is similar to *use_builtin_types* but it
+ applies only to date/time values.
+
+ .. versionchanged:: 3.3
+ The *use_builtin_types* flag was added.
.. _xmlrpc-client-example:
diff --git a/Doc/library/xmlrpc.rst b/Doc/library/xmlrpc.rst
new file mode 100644
index 0000000..ae68157
--- /dev/null
+++ b/Doc/library/xmlrpc.rst
@@ -0,0 +1,12 @@
+:mod:`xmlrpc` --- XMLRPC server and client modules
+==================================================
+
+XML-RPC is a Remote Procedure Call method that uses XML passed via HTTP as a
+transport. With it, a client can call methods with parameters on a remote
+server (the server is named by a URI) and get back structured data.
+
+``xmlrpc`` is a package that collects server and client modules implementing
+XML-RPC. The modules are:
+
+* :mod:`xmlrpc.client`
+* :mod:`xmlrpc.server`
diff --git a/Doc/library/xmlrpc.server.rst b/Doc/library/xmlrpc.server.rst
index 67feba6..6493fd4 100644
--- a/Doc/library/xmlrpc.server.rst
+++ b/Doc/library/xmlrpc.server.rst
@@ -16,7 +16,9 @@ servers written in Python. Servers can either be free standing, using
:class:`CGIXMLRPCRequestHandler`.
-.. class:: SimpleXMLRPCServer(addr, requestHandler=SimpleXMLRPCRequestHandler, logRequests=True, allow_none=False, encoding=None, bind_and_activate=True)
+.. class:: SimpleXMLRPCServer(addr, requestHandler=SimpleXMLRPCRequestHandler,\
+ logRequests=True, allow_none=False, encoding=None,\
+ bind_and_activate=True, use_builtin_types=False)
Create a new server instance. This class provides methods for registration of
functions that can be called by the XML-RPC protocol. The *requestHandler*
@@ -25,18 +27,31 @@ servers written in Python. Servers can either be free standing, using
are passed to the :class:`socketserver.TCPServer` constructor. If *logRequests*
is true (the default), requests will be logged; setting this parameter to false
will turn off logging. The *allow_none* and *encoding* parameters are passed
- on to :mod:`xmlrpc.client` and control the XML-RPC responses that will be returned
+ on to :mod:`xmlrpc.client` and control the XML-RPC responses that will be returned
from the server. The *bind_and_activate* parameter controls whether
:meth:`server_bind` and :meth:`server_activate` are called immediately by the
constructor; it defaults to true. Setting it to false allows code to manipulate
the *allow_reuse_address* class variable before the address is bound.
+ The *use_builtin_types* parameter is passed to the
+ :func:`~xmlrpc.client.loads` function and controls which types are processed
+ when date/times values or binary data are received; it defaults to false.
+ .. versionchanged:: 3.3
+ The *use_builtin_types* flag was added.
-.. class:: CGIXMLRPCRequestHandler(allow_none=False, encoding=None)
+
+.. class:: CGIXMLRPCRequestHandler(allow_none=False, encoding=None,\
+ use_builtin_types=False)
Create a new instance to handle XML-RPC requests in a CGI environment. The
*allow_none* and *encoding* parameters are passed on to :mod:`xmlrpc.client`
and control the XML-RPC responses that will be returned from the server.
+ The *use_builtin_types* parameter is passed to the
+ :func:`~xmlrpc.client.loads` function and controls which types are processed
+ when date/times values or binary data are received; it defaults to false.
+
+ .. versionchanged:: 3.3
+ The *use_builtin_types* flag was added.
.. class:: SimpleXMLRPCRequestHandler()
@@ -233,12 +248,17 @@ to HTTP GET requests. Servers can either be free standing, using
:class:`DocCGIXMLRPCRequestHandler`.
-.. class:: DocXMLRPCServer(addr, requestHandler=DocXMLRPCRequestHandler, logRequests=True, allow_none=False, encoding=None, bind_and_activate=True)
+.. class:: DocXMLRPCServer(addr, requestHandler=DocXMLRPCRequestHandler,\
+ logRequests=True, allow_none=False, encoding=None,\
+ bind_and_activate=True, use_builtin_types=True)
Create a new server instance. All parameters have the same meaning as for
:class:`SimpleXMLRPCServer`; *requestHandler* defaults to
:class:`DocXMLRPCRequestHandler`.
+ .. versionchanged:: 3.3
+ The *use_builtin_types* flag was added.
+
.. class:: DocCGIXMLRPCRequestHandler()
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 264cd47..9f6e077 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -87,7 +87,30 @@ The module defines the following items:
.. data:: ZIP_DEFLATED
The numeric constant for the usual ZIP compression method. This requires the
- :mod:`zlib` module. No other compression methods are currently supported.
+ :mod:`zlib` module.
+
+
+.. data:: ZIP_BZIP2
+
+ The numeric constant for the BZIP2 compression method. This requires the
+ :mod:`bz2` module.
+
+ .. versionadded:: 3.3
+
+.. data:: ZIP_LZMA
+
+ The numeric constant for the LZMA compression method. This requires the
+ :mod:`lzma` module.
+
+ .. versionadded:: 3.3
+
+ .. note::
+
+ The ZIP file format specification has included support for bzip2 compression
+ since 2001, and for LZMA compression since 2006. However, some tools
+ (including older Python releases) do not support these compression
+ methods, and may either refuse to process the ZIP file altogether,
+ or fail to extract individual files.
.. seealso::
@@ -118,9 +141,11 @@ ZipFile Objects
adding a ZIP archive to another file (such as :file:`python.exe`). If
*mode* is ``a`` and the file does not exist at all, it is created.
*compression* is the ZIP compression method to use when writing the archive,
- and should be :const:`ZIP_STORED` or :const:`ZIP_DEFLATED`; unrecognized
- values will cause :exc:`RuntimeError` to be raised. If :const:`ZIP_DEFLATED`
- is specified but the :mod:`zlib` module is not available, :exc:`RuntimeError`
+ and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
+ :const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
+ values will cause :exc:`RuntimeError` to be raised. If :const:`ZIP_DEFLATED`,
+ :const:`ZIP_BZIP2` or :const:`ZIP_LZMA` is specified but the corresponded module
+ (:mod:`zlib`, :mod:`bz2` or :mod:`lzma`) is not available, :exc:`RuntimeError`
is also raised. The default is :const:`ZIP_STORED`. If *allowZip64* is
``True`` zipfile will create ZIP files that use the ZIP64 extensions when
the zipfile is larger than 2 GB. If it is false (the default) :mod:`zipfile`
@@ -143,6 +168,9 @@ ZipFile Objects
.. versionadded:: 3.2
Added the ability to use :class:`ZipFile` as a context manager.
+ .. versionchanged:: 3.3
+ Added support for :mod:`bzip2 <bz2>` and :mod:`lzma` compression.
+
.. method:: ZipFile.close()
diff --git a/Doc/library/zipimport.rst b/Doc/library/zipimport.rst
index 4f17092..b47c35b 100644
--- a/Doc/library/zipimport.rst
+++ b/Doc/library/zipimport.rst
@@ -85,9 +85,12 @@ zipimporter Objects
.. method:: get_data(pathname)
- Return the data associated with *pathname*. Raise :exc:`IOError` if the
+ Return the data associated with *pathname*. Raise :exc:`OSError` if the
file wasn't found.
+ .. versionchanged:: 3.3
+ :exc:`IOError` used to be raised instead of :exc:`OSError`.
+
.. method:: get_filename(fullname)
diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst
index 897d919..42535a0 100644
--- a/Doc/library/zlib.rst
+++ b/Doc/library/zlib.rst
@@ -58,12 +58,37 @@ The available exception and functions in this module are:
exception if any error occurs.
-.. function:: compressobj([level])
+.. function:: compressobj(level=-1, method=DEFLATED, wbits=15, memlevel=8, strategy=Z_DEFAULT_STRATEGY[, zdict])
Returns a compression object, to be used for compressing data streams that won't
- fit into memory at once. *level* is an integer from ``1`` to ``9`` controlling
- the level of compression; ``1`` is fastest and produces the least compression,
- ``9`` is slowest and produces the most. The default value is ``6``.
+ fit into memory at once.
+
+ *level* is the compression level -- an integer from ``1`` to ``9``. A value
+ of ``1`` is fastest and produces the least compression, while a value of
+ ``9`` is slowest and produces the most. The default value is ``6``.
+
+ *method* is the compression algorithm. Currently, the only supported value is
+ ``DEFLATED``.
+
+ *wbits* is the base two logarithm of the size of the window buffer. This
+ should be an integer from ``8`` to ``15``. Higher values give better
+ compression, but use more memory.
+
+ *memlevel* controls the amount of memory used for internal compression state.
+ Valid values range from ``1`` to ``9``. Higher values using more memory,
+ but are faster and produce smaller output.
+
+ *strategy* is used to tune the compression algorithm. Possible values are
+ ``Z_DEFAULT_STRATEGY``, ``Z_FILTERED``, and ``Z_HUFFMAN_ONLY``.
+
+ *zdict* is a predefined compression dictionary. This is a sequence of bytes
+ (such as a :class:`bytes` object) containing subsequences that are expected
+ to occur frequently in the data that is to be compressed. Those subsequences
+ that are expected to be most common should come at the end of the dictionary.
+
+ .. versionchanged:: 3.3
+ Added the *method*, *wbits*, *memlevel*, *strategy* and *zdict*
+ parameters.
.. function:: crc32(data[, value])
@@ -82,12 +107,13 @@ The available exception and functions in this module are:
Always returns an unsigned 32-bit integer.
-.. note::
- To generate the same numeric value across all Python versions and
- platforms use crc32(data) & 0xffffffff. If you are only using
- the checksum in packed binary format this is not necessary as the
- return value is the correct 32bit binary representation
- regardless of sign.
+ .. note::
+
+ To generate the same numeric value across all Python versions and
+ platforms, use ``crc32(data) & 0xffffffff``. If you are only using
+ the checksum in packed binary format this is not necessary as the
+ return value is the correct 32-bit binary representation
+ regardless of sign.
.. function:: decompress(data[, wbits[, bufsize]])
@@ -114,11 +140,26 @@ The available exception and functions in this module are:
to :c:func:`malloc`. The default size is 16384.
-.. function:: decompressobj([wbits])
+.. function:: decompressobj(wbits=15[, zdict])
Returns a decompression object, to be used for decompressing data streams that
- won't fit into memory at once. The *wbits* parameter controls the size of the
- window buffer.
+ won't fit into memory at once.
+
+ The *wbits* parameter controls the size of the window buffer.
+
+ The *zdict* parameter specifies a predefined compression dictionary. If
+ provided, this must be the same dictionary as was used by the compressor that
+ produced the data that is to be decompressed.
+
+ .. note::
+
+ If *zdict* is a mutable object (such as a :class:`bytearray`), you must not
+ modify its contents between the call to :func:`decompressobj` and the first
+ call to the decompressor's ``decompress()`` method.
+
+ .. versionchanged:: 3.3
+ Added the *zdict* parameter.
+
Compression objects support the following methods:
@@ -150,7 +191,7 @@ Compression objects support the following methods:
compress a set of data that share a common initial prefix.
-Decompression objects support the following methods, and two attributes:
+Decompression objects support the following methods and attributes:
.. attribute:: Decompress.unused_data
@@ -160,13 +201,6 @@ Decompression objects support the following methods, and two attributes:
available. If the whole bytestring turned out to contain compressed data, this is
``b""``, an empty bytes object.
- The only way to determine where a bytestring of compressed data ends is by actually
- decompressing it. This means that when compressed data is contained part of a
- larger file, you can only find the end of it by reading data and feeding it
- followed by some non-empty bytestring into a decompression object's
- :meth:`decompress` method until the :attr:`unused_data` attribute is no longer
- empty.
-
.. attribute:: Decompress.unconsumed_tail
@@ -177,6 +211,17 @@ Decompression objects support the following methods, and two attributes:
:meth:`decompress` method call in order to get correct output.
+.. attribute:: Decompress.eof
+
+ A boolean indicating whether the end of the compressed data stream has been
+ reached.
+
+ This makes it possible to distinguish between a properly-formed compressed
+ stream, and an incomplete or truncated one.
+
+ .. versionadded:: 3.3
+
+
.. method:: Decompress.decompress(data[, max_length])
Decompress *data*, returning a bytes object containing the uncompressed data
@@ -211,6 +256,24 @@ Decompression objects support the following methods, and two attributes:
seeks into the stream at a future point.
+Information about the version of the zlib library in use is available through
+the following constants:
+
+
+.. data:: ZLIB_VERSION
+
+ The version string of the zlib library that was used for building the module.
+ This may be different from the zlib library actually used at runtime, which
+ is available as :const:`ZLIB_RUNTIME_VERSION`.
+
+
+.. data:: ZLIB_RUNTIME_VERSION
+
+ The version string of the zlib library actually loaded by the interpreter.
+
+ .. versionadded:: 3.3
+
+
.. seealso::
Module :mod:`gzip`
diff --git a/Doc/license.rst b/Doc/license.rst
index a44fa04..6326ce4 100644
--- a/Doc/license.rst
+++ b/Doc/license.rst
@@ -120,6 +120,8 @@ been GPL-compatible; the table below summarizes the various releases.
+----------------+--------------+------------+------------+-----------------+
| 3.2.3 | 3.2.2 | 2012 | PSF | yes |
+----------------+--------------+------------+------------+-----------------+
+| 3.3.0 | 3.2 | 2012 | PSF | yes |
++----------------+--------------+------------+------------+-----------------+
.. note::
diff --git a/Doc/make.bat b/Doc/make.bat
index 4ea2d51..a2220c0 100644
--- a/Doc/make.bat
+++ b/Doc/make.bat
@@ -37,7 +37,7 @@ goto end
svn co %SVNROOT%/external/Sphinx-1.0.7/sphinx tools/sphinx
svn co %SVNROOT%/external/docutils-0.6/docutils tools/docutils
svn co %SVNROOT%/external/Jinja-2.3.1/jinja2 tools/jinja2
-svn co %SVNROOT%/external/Pygments-1.3.1/pygments tools/pygments
+svn co %SVNROOT%/external/Pygments-1.5dev-20120930/pygments tools/pygments
goto end
:update
diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst
index 322e8c8..d093383 100644
--- a/Doc/reference/datamodel.rst
+++ b/Doc/reference/datamodel.rst
@@ -35,12 +35,19 @@ represented by objects.)
Every object has an identity, a type and a value. An object's *identity* never
changes once it has been created; you may think of it as the object's address in
memory. The ':keyword:`is`' operator compares the identity of two objects; the
-:func:`id` function returns an integer representing its identity (currently
-implemented as its address). An object's :dfn:`type` is also unchangeable. [#]_
+:func:`id` function returns an integer representing its identity.
+
+.. impl-detail::
+
+ For CPython, ``id(x)`` is the memory address where ``x`` is stored.
+
An object's type determines the operations that the object supports (e.g., "does
it have a length?") and also defines the possible values for objects of that
type. The :func:`type` function returns an object's type (which is an object
-itself). The *value* of some objects can change. Objects whose value can
+itself). Like its identity, an object's :dfn:`type` is also unchangeable.
+[#]_
+
+The *value* of some objects can change. Objects whose value can
change are said to be *mutable*; objects whose value is unchangeable once they
are created are called *immutable*. (The value of an immutable container object
that contains a reference to a mutable object can change when the latter's value
@@ -276,16 +283,16 @@ Sequences
single: integer
single: Unicode
- The items of a string object are Unicode code units. A Unicode code
- unit is represented by a string object of one item and can hold either
- a 16-bit or 32-bit value representing a Unicode ordinal (the maximum
- value for the ordinal is given in ``sys.maxunicode``, and depends on
- how Python is configured at compile time). Surrogate pairs may be
- present in the Unicode object, and will be reported as two separate
- items. The built-in functions :func:`chr` and :func:`ord` convert
- between code units and nonnegative integers representing the Unicode
- ordinals as defined in the Unicode Standard 3.0. Conversion from and to
- other encodings are possible through the string method :meth:`encode`.
+ A string is a sequence of values that represent Unicode codepoints.
+ All the codepoints in range ``U+0000 - U+10FFFF`` can be represented
+ in a string. Python doesn't have a :c:type:`chr` type, and
+ every character in the string is represented as a string object
+ with length ``1``. The built-in function :func:`ord` converts a
+ character to its codepoint (as an integer); :func:`chr` converts
+ an integer in range ``0 - 10FFFF`` to the corresponding character.
+ :meth:`str.encode` can be used to convert a :class:`str` to
+ :class:`bytes` using the given encoding, and :meth:`bytes.decode` can
+ be used to achieve the opposite.
Tuples
.. index::
@@ -448,6 +455,11 @@ Callable types
+-------------------------+-------------------------------+-----------+
| :attr:`__name__` | The function's name | Writable |
+-------------------------+-------------------------------+-----------+
+ | :attr:`__qualname__` | The function's | Writable |
+ | | :term:`qualified name` | |
+ | | | |
+ | | .. versionadded:: 3.3 | |
+ +-------------------------+-------------------------------+-----------+
| :attr:`__module__` | The name of the module the | Writable |
| | function was defined in, or | |
| | ``None`` if unavailable. | |
@@ -639,17 +651,20 @@ Modules
statement: import
object: module
- Modules are imported by the :keyword:`import` statement (see section
- :ref:`import`). A module object has a
- namespace implemented by a dictionary object (this is the dictionary referenced
- by the __globals__ attribute of functions defined in the module). Attribute
- references are translated to lookups in this dictionary, e.g., ``m.x`` is
- equivalent to ``m.__dict__["x"]``. A module object does not contain the code
- object used to initialize the module (since it isn't needed once the
- initialization is done).
-
- Attribute assignment updates the module's namespace dictionary, e.g., ``m.x =
- 1`` is equivalent to ``m.__dict__["x"] = 1``.
+ Modules are a basic organizational unit of Python code, and are created by
+ the :ref:`import system <importsystem>` as invoked either by the
+ :keyword:`import` statement (see :keyword:`import`), or by calling
+ functions such as :func:`importlib.import_module` and built-in
+ :func:`__import__`. A module object has a namespace implemented by a
+ dictionary object (this is the dictionary referenced by the ``__globals__``
+ attribute of functions defined in the module). Attribute references are
+ translated to lookups in this dictionary, e.g., ``m.x`` is equivalent to
+ ``m.__dict__["x"]``. A module object does not contain the code object used
+ to initialize the module (since it isn't needed once the initialization is
+ done).
+
+ Attribute assignment updates the module's namespace dictionary, e.g.,
+ ``m.x = 1`` is equivalent to ``m.__dict__["x"] = 1``.
.. index:: single: __dict__ (module attribute)
@@ -671,11 +686,12 @@ Modules
Predefined (writable) attributes: :attr:`__name__` is the module's name;
:attr:`__doc__` is the module's documentation string, or ``None`` if
- unavailable; :attr:`__file__` is the pathname of the file from which the module
- was loaded, if it was loaded from a file. The :attr:`__file__` attribute is not
- present for C modules that are statically linked into the interpreter; for
- extension modules loaded dynamically from a shared library, it is the pathname
- of the shared library file.
+ unavailable; :attr:`__file__` is the pathname of the file from which the
+ module was loaded, if it was loaded from a file. The :attr:`__file__`
+ attribute may be missing for certain types of modules, such as C modules
+ that are statically linked into the interpreter; for extension modules
+ loaded dynamically from a shared library, it is the pathname of the shared
+ library file.
Custom classes
Custom class types are typically created by class definitions (see section
@@ -1250,10 +1266,10 @@ Basic customization
immutable (if the object's hash value changes, it will be in the wrong hash
bucket).
-
User-defined classes have :meth:`__eq__` and :meth:`__hash__` methods
by default; with them, all objects compare unequal (except with themselves)
- and ``x.__hash__()`` returns ``id(x)``.
+ and ``x.__hash__()`` returns an appropriate value such that ``x == y``
+ implies both that ``x is y`` and ``hash(x) == hash(y)``.
A class that overrides :meth:`__eq__` and does not define :meth:`__hash__`
will have its :meth:`__hash__` implicitly set to ``None``. When the
@@ -1272,7 +1288,27 @@ Basic customization
a :exc:`TypeError` would be incorrectly identified as hashable by
an ``isinstance(obj, collections.Hashable)`` call.
- See also the :option:`-R` command-line option.
+
+ .. note::
+
+ By default, the :meth:`__hash__` values of str, bytes and datetime
+ objects are "salted" with an unpredictable random value. Although they
+ remain constant within an individual Python process, they are not
+ predictable between repeated invocations of Python.
+
+ This is intended to provide protection against a denial-of-service caused
+ by carefully-chosen inputs that exploit the worst case performance of a
+ dict insertion, O(n^2) complexity. See
+ http://www.ocert.org/advisories/ocert-2011-003.html for details.
+
+ Changing hash values affects the iteration order of dicts, sets and
+ other mappings. Python has never made guarantees about this ordering
+ (and it typically varies between 32-bit and 64-bit builds).
+
+ See also :envvar:`PYTHONHASHSEED`.
+
+ .. versionchanged:: 3.3
+ Hash randomization is enabled by default.
.. method:: object.__bool__(self)
@@ -1353,7 +1389,8 @@ access (use of, assignment to, or deletion of ``x.name``) for class instances.
.. method:: object.__dir__(self)
- Called when :func:`dir` is called on the object. A list must be returned.
+ Called when :func:`dir` is called on the object. A sequence must be
+ returned. :func:`dir` converts the returned sequence to a list and sorts it.
.. _descriptors:
@@ -1524,53 +1561,115 @@ Notes on using *__slots__*
Customizing class creation
--------------------------
-By default, classes are constructed using :func:`type`. A class definition is
-read into a separate namespace and the value of class name is bound to the
-result of ``type(name, bases, dict)``.
+By default, classes are constructed using :func:`type`. The class body is
+executed in a new namespace and the class name is bound locally to the
+result of ``type(name, bases, namespace)``.
+
+The class creation process can be customised by passing the ``metaclass``
+keyword argument in the class definition line, or by inheriting from an
+existing class that included such an argument. In the following example,
+both ``MyClass`` and ``MySubclass`` are instances of ``Meta``::
+
+ class Meta(type):
+ pass
+
+ class MyClass(metaclass=Meta):
+ pass
+
+ class MySubclass(MyClass):
+ pass
-When the class definition is read, if a callable ``metaclass`` keyword argument
-is passed after the bases in the class definition, the callable given will be
-called instead of :func:`type`. If other keyword arguments are passed, they
-will also be passed to the metaclass. This allows classes or functions to be
-written which monitor or alter the class creation process:
+Any other keyword arguments that are specified in the class definition are
+passed through to all metaclass operations described below.
-* Modifying the class dictionary prior to the class being created.
+When a class definition is executed, the following steps occur:
-* Returning an instance of another class -- essentially performing the role of a
- factory function.
+* the appropriate metaclass is determined
+* the class namespace is prepared
+* the class body is executed
+* the class object is created
-These steps will have to be performed in the metaclass's :meth:`__new__` method
--- :meth:`type.__new__` can then be called from this method to create a class
-with different properties. This example adds a new element to the class
-dictionary before creating the class::
+Determining the appropriate metaclass
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- class metacls(type):
- def __new__(mcs, name, bases, dict):
- dict['foo'] = 'metacls was here'
- return type.__new__(mcs, name, bases, dict)
+The appropriate metaclass for a class definition is determined as follows:
-You can of course also override other class methods (or add new methods); for
-example defining a custom :meth:`__call__` method in the metaclass allows custom
-behavior when the class is called, e.g. not always creating a new instance.
+* if no bases and no explicit metaclass are given, then :func:`type` is used
+* if an explicit metaclass is given and it is *not* an instance of
+ :func:`type`, then it is used directly as the metaclass
+* if an instance of :func:`type` is given as the explicit metaclass, or
+ bases are defined, then the most derived metaclass is used
-If the metaclass has a :meth:`__prepare__` attribute (usually implemented as a
-class or static method), it is called before the class body is evaluated with
-the name of the class and a tuple of its bases for arguments. It should return
-an object that supports the mapping interface that will be used to store the
-namespace of the class. The default is a plain dictionary. This could be used,
-for example, to keep track of the order that class attributes are declared in by
-returning an ordered dictionary.
+The most derived metaclass is selected from the explicitly specified
+metaclass (if any) and the metaclasses (i.e. ``type(cls)``) of all specified
+base classes. The most derived metaclass is one which is a subtype of *all*
+of these candidate metaclasses. If none of the candidate metaclasses meets
+that criterion, then the class definition will fail with ``TypeError``.
-The appropriate metaclass is determined by the following precedence rules:
-* If the ``metaclass`` keyword argument is passed with the bases, it is used.
+Preparing the class namespace
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Once the appropriate metaclass has been identified, then the class namespace
+is prepared. If the metaclass has a ``__prepare__`` attribute, it is called
+as ``namespace = metaclass.__prepare__(name, bases, **kwds)`` (where the
+additional keyword arguments, if any, come from the class definition).
+
+If the metaclass has no ``__prepare__`` attribute, then the class namespace
+is initialised as an empty :func:`dict` instance.
+
+.. seealso::
+
+ :pep:`3115` - Metaclasses in Python 3000
+ Introduced the ``__prepare__`` namespace hook
+
+
+Executing the class body
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The class body is executed (approximately) as
+``exec(body, globals(), namespace)``. The key difference from a normal
+call to :func:`exec` is that lexical scoping allows the class body (including
+any methods) to reference names from the current and outer scopes when the
+class definition occurs inside a function.
+
+However, even when the class definition occurs inside the function, methods
+defined inside the class still cannot see names defined at the class scope.
+Class variables must be accessed through the first parameter of instance or
+class methods, and cannot be accessed at all from static methods.
+
+
+Creating the class object
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Once the class namespace has been populated by executing the class body,
+the class object is created by calling
+``metaclass(name, bases, namespace, **kwds)`` (the additional keywords
+passed here are the same as those passed to ``__prepare__``).
+
+This class object is the one that will be referenced by the zero-argument
+form of :func:`super`. ``__class__`` is an implicit closure reference
+created by the compiler if any methods in a class body refer to either
+``__class__`` or ``super``. This allows the zero argument form of
+:func:`super` to correctly identify the class being defined based on
+lexical scoping, while the class or instance that was used to make the
+current call is identified based on the first argument passed to the method.
+
+After the class object is created, it is passed to the class decorators
+included in the class definition (if any) and the resulting object is bound
+in the local namespace as the defined class.
+
+.. seealso::
+
+ :pep:`3135` - New super
+ Describes the implicit ``__class__`` closure reference
-* Otherwise, if there is at least one base class, its metaclass is used.
-* Otherwise, the default metaclass (:class:`type`) is used.
+Metaclass example
+^^^^^^^^^^^^^^^^^
The potential uses for metaclasses are boundless. Some ideas that have been
-explored including logging, interface checking, automatic delegation, automatic
+explored include logging, interface checking, automatic delegation, automatic
property creation, proxies, frameworks, and automatic resource
locking/synchronization.
@@ -1583,9 +1682,9 @@ to remember the order that class members were defined::
def __prepare__(metacls, name, bases, **kwds):
return collections.OrderedDict()
- def __new__(cls, name, bases, classdict):
- result = type.__new__(cls, name, bases, dict(classdict))
- result.members = tuple(classdict)
+ def __new__(cls, name, bases, namespace, **kwds):
+ result = type.__new__(cls, name, bases, dict(namespace))
+ result.members = tuple(namespace)
return result
class A(metaclass=OrderedClass):
diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst
index 2c6acb6..41523cb 100644
--- a/Doc/reference/expressions.rst
+++ b/Doc/reference/expressions.rst
@@ -318,7 +318,7 @@ Yield expressions
.. productionlist::
yield_atom: "(" `yield_expression` ")"
- yield_expression: "yield" [`expression_list`]
+ yield_expression: "yield" [`expression_list` | "from" `expression`]
The :keyword:`yield` expression is only used when defining a generator function,
and can only be used in the body of a function definition. Using a
@@ -336,7 +336,10 @@ the internal evaluation stack. When the execution is resumed by calling one of
the generator's methods, the function can proceed exactly as if the
:keyword:`yield` expression was just another external call. The value of the
:keyword:`yield` expression after resuming depends on the method which resumed
-the execution.
+the execution. If :meth:`__next__` is used (typically via either a
+:keyword:`for` or the :func:`next` builtin) then the result is :const:`None`,
+otherwise, if :meth:`send` is used, then the result will be the value passed
+in to that method.
.. index:: single: coroutine
@@ -346,12 +349,32 @@ suspended. The only difference is that a generator function cannot control
where should the execution continue after it yields; the control is always
transferred to the generator's caller.
-The :keyword:`yield` statement is allowed in the :keyword:`try` clause of a
+:keyword:`yield` expressions are allowed in the :keyword:`try` clause of a
:keyword:`try` ... :keyword:`finally` construct. If the generator is not
resumed before it is finalized (by reaching a zero reference count or by being
garbage collected), the generator-iterator's :meth:`close` method will be
called, allowing any pending :keyword:`finally` clauses to execute.
+When ``yield from <expr>`` is used, it treats the supplied expression as
+a subiterator. All values produced by that subiterator are passed directly
+to the caller of the current generator's methods. Any values passed in with
+:meth:`send` and any exceptions passed in with :meth:`throw` are passed to
+the underlying iterator if it has the appropriate methods. If this is not the
+case, then :meth:`send` will raise :exc:`AttributeError` or :exc:`TypeError`,
+while :meth:`throw` will just raise the passed in exception immediately.
+
+When the underlying iterator is complete, the :attr:`~StopIteration.value`
+attribute of the raised :exc:`StopIteration` instance becomes the value of
+the yield expression. It can be either set explicitly when raising
+:exc:`StopIteration`, or automatically when the sub-iterator is a generator
+(by returning a value from the sub-generator).
+
+ .. versionchanged:: 3.3
+ Added ``yield from <expr>`` to delegate control flow to a subiterator
+
+The parentheses can be omitted when the :keyword:`yield` expression is the
+sole expression on the right hand side of an assignment statement.
+
.. index:: object: generator
@@ -452,6 +475,10 @@ generator functions::
The proposal to enhance the API and syntax of generators, making them
usable as simple coroutines.
+ :pep:`0380` - Syntax for Delegating to a Subgenerator
+ The proposal to introduce the :token:`yield_from` syntax, making delegation
+ to sub-generators easy.
+
.. _primaries:
diff --git a/Doc/reference/import.rst b/Doc/reference/import.rst
new file mode 100644
index 0000000..4688e78
--- /dev/null
+++ b/Doc/reference/import.rst
@@ -0,0 +1,697 @@
+
+.. _importsystem:
+
+*****************
+The import system
+*****************
+
+.. index:: single: import machinery
+
+Python code in one :term:`module` gains access to the code in another module
+by the process of :term:`importing` it. The :keyword:`import` statement is
+the most common way of invoking the import machinery, but it is not the only
+way. Functions such as :func:`importlib.import_module` and built-in
+:func:`__import__` can also be used to invoke the import machinery.
+
+The :keyword:`import` statement combines two operations; it searches for the
+named module, then it binds the results of that search to a name in the local
+scope. The search operation of the :keyword:`import` statement is defined as
+a call to the :func:`__import__` function, with the appropriate arguments.
+The return value of :func:`__import__` is used to perform the name
+binding operation of the :keyword:`import` statement. See the
+:keyword:`import` statement for the exact details of that name binding
+operation.
+
+A direct call to :func:`__import__` performs only the module search and, if
+found, the module creation operation. While certain side-effects may occur,
+such as the importing of parent packages, and the updating of various caches
+(including :data:`sys.modules`), only the :keyword:`import` statement performs
+a name binding operation.
+
+When calling :func:`__import__` as part of an import statement, the
+import system first checks the module global namespace for a function by
+that name. If it is not found, then the standard builtin :func:`__import__`
+is called. Other mechanisms for invoking the import system (such as
+:func:`importlib.import_module`) do not perform this check and will always
+use the standard import system.
+
+When a module is first imported, Python searches for the module and if found,
+it creates a module object [#fnmo]_, initializing it. If the named module
+cannot be found, an :exc:`ImportError` is raised. Python implements various
+strategies to search for the named module when the import machinery is
+invoked. These strategies can be modified and extended by using various hooks
+described in the sections below.
+
+.. versionchanged:: 3.3
+ The import system has been updated to fully implement the second phase
+ of PEP 302. There is no longer any implicit import machinery - the full
+ import system is exposed through :data:`sys.meta_path`. In addition,
+ native namespace package support has been implemented (see PEP 420).
+
+
+:mod:`importlib`
+================
+
+The :mod:`importlib` module provides a rich API for interacting with the
+import system. For example :func:`importlib.import_module` provides a
+recommended, simpler API than built-in :func:`__import__` for invoking the
+import machinery. Refer to the :mod:`importlib` library documentation for
+additional detail.
+
+
+
+Packages
+========
+
+.. index::
+ single: package
+
+Python has only one type of module object, and all modules are of this type,
+regardless of whether the module is implemented in Python, C, or something
+else. To help organize modules and provide a naming hierarchy, Python has a
+concept of :term:`packages <package>`.
+
+You can think of packages as the directories on a file system and modules as
+files within directories, but don't take this analogy too literally since
+packages and modules need not originate from the file system. For the
+purposes of this documentation, we'll use this convenient analogy of
+directories and files. Like file system directories, packages are organized
+hierarchically, and packages may themselves contain subpackages, as well as
+regular modules.
+
+It's important to keep in mind that all packages are modules, but not all
+modules are packages. Or put another way, packages are just a special kind of
+module. Specifically, any module that contains a ``__path__`` attribute is
+considered a package.
+
+All modules have a name. Subpackage names are separated from their parent
+package name by dots, akin to Python's standard attribute access syntax. Thus
+you might have a module called :mod:`sys` and a package called :mod:`email`,
+which in turn has a subpackage called :mod:`email.mime` and a module within
+that subpackage called :mod:`email.mime.text`.
+
+
+Regular packages
+----------------
+
+.. index::
+ pair: package; regular
+
+Python defines two types of packages, :term:`regular packages <regular
+package>` and :term:`namespace packages <namespace package>`. Regular
+packages are traditional packages as they existed in Python 3.2 and earlier.
+A regular package is typically implemented as a directory containing an
+``__init__.py`` file. When a regular package is imported, this
+``__init__.py`` file is implicitly executed, and the objects it defines are
+bound to names in the package's namespace. The ``__init__.py`` file can
+contain the same Python code that any other module can contain, and Python
+will add some additional attributes to the module when it is imported.
+
+For example, the following file system layout defines a top level ``parent``
+package with three subpackages::
+
+ parent/
+ __init__.py
+ one/
+ __init__.py
+ two/
+ __init__.py
+ three/
+ __init__.py
+
+Importing ``parent.one`` will implicitly execute ``parent/__init__.py`` and
+``parent/one/__init__.py``. Subsequent imports of ``parent.two`` or
+``parent.three`` will execute ``parent/two/__init__.py`` and
+``parent/three/__init__.py`` respectively.
+
+
+Namespace packages
+------------------
+
+.. index::
+ pair:: package; namespace
+ pair:: package; portion
+
+A namespace package is a composite of various :term:`portions <portion>`,
+where each portion contributes a subpackage to the parent package. Portions
+may reside in different locations on the file system. Portions may also be
+found in zip files, on the network, or anywhere else that Python searches
+during import. Namespace packages may or may not correspond directly to
+objects on the file system; they may be virtual modules that have no concrete
+representation.
+
+Namespace packages do not use an ordinary list for their ``__path__``
+attribute. They instead use a custom iterable type which will automatically
+perform a new search for package portions on the next import attempt within
+that package if the path of their parent package (or :data:`sys.path` for a
+top level package) changes.
+
+With namespace packages, there is no ``parent/__init__.py`` file. In fact,
+there may be multiple ``parent`` directories found during import search, where
+each one is provided by a different portion. Thus ``parent/one`` may not be
+physically located next to ``parent/two``. In this case, Python will create a
+namespace package for the top-level ``parent`` package whenever it or one of
+its subpackages is imported.
+
+See also :pep:`420` for the namespace package specification.
+
+
+Searching
+=========
+
+To begin the search, Python needs the :term:`fully qualified <qualified name>`
+name of the module (or package, but for the purposes of this discussion, the
+difference is immaterial) being imported. This name may come from various
+arguments to the :keyword:`import` statement, or from the parameters to the
+:func:`importlib.import_module` or :func:`__import__` functions.
+
+This name will be used in various phases of the import search, and it may be
+the dotted path to a submodule, e.g. ``foo.bar.baz``. In this case, Python
+first tries to import ``foo``, then ``foo.bar``, and finally ``foo.bar.baz``.
+If any of the intermediate imports fail, an :exc:`ImportError` is raised.
+
+
+The module cache
+----------------
+
+.. index::
+ single: sys.modules
+
+The first place checked during import search is :data:`sys.modules`. This
+mapping serves as a cache of all modules that have been previously imported,
+including the intermediate paths. So if ``foo.bar.baz`` was previously
+imported, :data:`sys.modules` will contain entries for ``foo``, ``foo.bar``,
+and ``foo.bar.baz``. Each key will have as its value the corresponding module
+object.
+
+During import, the module name is looked up in :data:`sys.modules` and if
+present, the associated value is the module satisfying the import, and the
+process completes. However, if the value is ``None``, then an
+:exc:`ImportError` is raised. If the module name is missing, Python will
+continue searching for the module.
+
+:data:`sys.modules` is writable. Deleting a key may not destroy the
+associated module (as other modules may hold references to it),
+but it will invalidate the cache entry for the named module, causing
+Python to search anew for the named module upon its next
+import. The key can also be assigned to ``None``, forcing the next import
+of the module to result in an :exc:`ImportError`.
+
+Beware though, as if you keep a reference to the module object,
+invalidate its cache entry in :data:`sys.modules`, and then re-import the
+named module, the two module objects will *not* be the same. By contrast,
+:func:`imp.reload` will reuse the *same* module object, and simply
+reinitialise the module contents by rerunning the module's code.
+
+
+Finders and loaders
+-------------------
+
+.. index::
+ single: finder
+ single: loader
+
+If the named module is not found in :data:`sys.modules`, then Python's import
+protocol is invoked to find and load the module. This protocol consists of
+two conceptual objects, :term:`finders <finder>` and :term:`loaders <loader>`.
+A finder's job is to determine whether it can find the named module using
+whatever strategy it knows about. Objects that implement both of these
+interfaces are referred to as :term:`importers <importer>` - they return
+themselves when they find that they can load the requested module.
+
+Python includes a number of default finders and importers. One
+knows how to locate frozen modules, and another knows how to locate
+built-in modules. A third default finder searches an :term:`import path`
+for modules. The :term:`import path` is a list of locations that may
+name file system paths or zip files. It can also be extended to search
+for any locatable resource, such as those identified by URLs.
+
+The import machinery is extensible, so new finders can be added to extend the
+range and scope of module searching.
+
+Finders do not actually load modules. If they can find the named module, they
+return a :term:`loader`, which the import machinery then invokes to load the
+module and create the corresponding module object.
+
+The following sections describe the protocol for finders and loaders in more
+detail, including how you can create and register new ones to extend the
+import machinery.
+
+
+Import hooks
+------------
+
+.. index::
+ single: import hooks
+ single: meta hooks
+ single: path hooks
+ pair: hooks; import
+ pair: hooks; meta
+ pair: hooks; path
+
+The import machinery is designed to be extensible; the primary mechanism for
+this are the *import hooks*. There are two types of import hooks: *meta
+hooks* and *import path hooks*.
+
+Meta hooks are called at the start of import processing, before any other
+import processing has occurred, other than :data:`sys.modules` cache look up.
+This allows meta hooks to override :data:`sys.path` processing, frozen
+modules, or even built-in modules. Meta hooks are registered by adding new
+finder objects to :data:`sys.meta_path`, as described below.
+
+Import path hooks are called as part of :data:`sys.path` (or
+``package.__path__``) processing, at the point where their associated path
+item is encountered. Import path hooks are registered by adding new callables
+to :data:`sys.path_hooks` as described below.
+
+
+The meta path
+-------------
+
+.. index::
+ single: sys.meta_path
+ pair: finder; find_module
+ pair: finder; find_loader
+
+When the named module is not found in :data:`sys.modules`, Python next
+searches :data:`sys.meta_path`, which contains a list of meta path finder
+objects. These finders are queried in order to see if they know how to handle
+the named module. Meta path finders must implement a method called
+:meth:`find_module()` which takes two arguments, a name and an import path.
+The meta path finder can use any strategy it wants to determine whether it can
+handle the named module or not.
+
+If the meta path finder knows how to handle the named module, it returns a
+loader object. If it cannot handle the named module, it returns ``None``. If
+:data:`sys.meta_path` processing reaches the end of its list without returning
+a loader, then an :exc:`ImportError` is raised. Any other exceptions raised
+are simply propagated up, aborting the import process.
+
+The :meth:`find_module()` method of meta path finders is called with two
+arguments. The first is the fully qualified name of the module being
+imported, for example ``foo.bar.baz``. The second argument is the path
+entries to use for the module search. For top-level modules, the second
+argument is ``None``, but for submodules or subpackages, the second
+argument is the value of the parent package's ``__path__`` attribute. If
+the appropriate ``__path__`` attribute cannot be accessed, an
+:exc:`ImportError` is raised.
+
+The meta path may be traversed multiple times for a single import request.
+For example, assuming none of the modules involved has already been cached,
+importing ``foo.bar.baz`` will first perform a top level import, calling
+``mpf.find_module("foo", None)`` on each meta path finder (``mpf``). After
+``foo`` has been imported, ``foo.bar`` will be imported by traversing the
+meta path a second time, calling
+``mpf.find_module("foo.bar", foo.__path__)``. Once ``foo.bar`` has been
+imported, the final traversal will call
+``mpf.find_module("foo.bar.baz", foo.bar.__path__)``.
+
+Some meta path finders only support top level imports. These importers will
+always return ``None`` when anything other than ``None`` is passed as the
+second argument.
+
+Python's default :data:`sys.meta_path` has three meta path finders, one that
+knows how to import built-in modules, one that knows how to import frozen
+modules, and one that knows how to import modules from an :term:`import path`
+(i.e. the :term:`path based finder`).
+
+
+Loaders
+=======
+
+If and when a module loader is found its
+:meth:`~importlib.abc.Loader.load_module` method is called, with a single
+argument, the fully qualified name of the module being imported. This method
+has several responsibilities, and should return the module object it has
+loaded [#fnlo]_. If it cannot load the module, it should raise an
+:exc:`ImportError`, although any other exception raised during
+:meth:`load_module()` will be propagated.
+
+In many cases, the finder and loader can be the same object; in such cases the
+:meth:`finder.find_module()` would just return ``self``.
+
+Loaders must satisfy the following requirements:
+
+ * If there is an existing module object with the given name in
+ :data:`sys.modules`, the loader must use that existing module. (Otherwise,
+ :func:`imp.reload` will not work correctly.) If the named module does
+ not exist in :data:`sys.modules`, the loader must create a new module
+ object and add it to :data:`sys.modules`.
+
+ Note that the module *must* exist in :data:`sys.modules` before the loader
+ executes the module code. This is crucial because the module code may
+ (directly or indirectly) import itself; adding it to :data:`sys.modules`
+ beforehand prevents unbounded recursion in the worst case and multiple
+ loading in the best.
+
+ If loading fails, the loader must remove any modules it has inserted into
+ :data:`sys.modules`, but it must remove **only** the failing module, and
+ only if the loader itself has loaded it explicitly. Any module already in
+ the :data:`sys.modules` cache, and any module that was successfully loaded
+ as a side-effect, must remain in the cache.
+
+ * The loader may set the ``__file__`` attribute of the module. If set, this
+ attribute's value must be a string. The loader may opt to leave
+ ``__file__`` unset if it has no semantic meaning (e.g. a module loaded from
+ a database).
+
+ * The loader may set the ``__name__`` attribute of the module. While not
+ required, setting this attribute is highly recommended so that the
+ :meth:`repr()` of the module is more informative.
+
+ * If the module is a package (either regular or namespace), the loader must
+ set the module object's ``__path__`` attribute. The value must be
+ iterable, but may be empty if ``__path__`` has no further significance
+ to the loader. If ``__path__`` is not empty, it must produce strings
+ when iterated over. More details on the semantics of ``__path__`` are
+ given :ref:`below <package-path-rules>`.
+
+ * The ``__loader__`` attribute must be set to the loader object that loaded
+ the module. This is mostly for introspection and reloading, but can be
+ used for additional loader-specific functionality, for example getting
+ data associated with a loader.
+
+ * The module's ``__package__`` attribute should be set. Its value must be a
+ string, but it can be the same value as its ``__name__``. If the attribute
+ is set to ``None`` or is missing, the import system will fill it in with a
+ more appropriate value. When the module is a package, its ``__package__``
+ value should be set to its ``__name__``. When the module is not a package,
+ ``__package__`` should be set to the empty string for top-level modules, or
+ for submodules, to the parent package's name. See :pep:`366` for further
+ details.
+
+ This attribute is used instead of ``__name__`` to calculate explicit
+ relative imports for main modules, as defined in :pep:`366`.
+
+ * If the module is a Python module (as opposed to a built-in module or a
+ dynamically loaded extension), the loader should execute the module's code
+ in the module's global name space (``module.__dict__``).
+
+
+Module reprs
+------------
+
+By default, all modules have a usable repr, however depending on the
+attributes set above, and hooks in the loader, you can more explicitly control
+the repr of module objects.
+
+Loaders may implement a :meth:`module_repr()` method which takes a single
+argument, the module object. When ``repr(module)`` is called for a module
+with a loader supporting this protocol, whatever is returned from
+``module.__loader__.module_repr(module)`` is returned as the module's repr
+without further processing. This return value must be a string.
+
+If the module has no ``__loader__`` attribute, or the loader has no
+:meth:`module_repr()` method, then the module object implementation itself
+will craft a default repr using whatever information is available. It will
+try to use the ``module.__name__``, ``module.__file__``, and
+``module.__loader__`` as input into the repr, with defaults for whatever
+information is missing.
+
+Here are the exact rules used:
+
+ * If the module has a ``__loader__`` and that loader has a
+ :meth:`module_repr()` method, call it with a single argument, which is the
+ module object. The value returned is used as the module's repr.
+
+ * If an exception occurs in :meth:`module_repr()`, the exception is caught
+ and discarded, and the calculation of the module's repr continues as if
+ :meth:`module_repr()` did not exist.
+
+ * If the module has a ``__file__`` attribute, this is used as part of the
+ module's repr.
+
+ * If the module has no ``__file__`` but does have a ``__loader__``, then the
+ loader's repr is used as part of the module's repr.
+
+ * Otherwise, just use the module's ``__name__`` in the repr.
+
+This example, from :pep:`420` shows how a loader can craft its own module
+repr::
+
+ class NamespaceLoader:
+ @classmethod
+ def module_repr(cls, module):
+ return "<module '{}' (namespace)>".format(module.__name__)
+
+
+.. _package-path-rules:
+
+module.__path__
+---------------
+
+By definition, if a module has an ``__path__`` attribute, it is a package,
+regardless of its value.
+
+A package's ``__path__`` attribute is used during imports of its subpackages.
+Within the import machinery, it functions much the same as :data:`sys.path`,
+i.e. providing a list of locations to search for modules during import.
+However, ``__path__`` is typically much more constrained than
+:data:`sys.path`.
+
+``__path__`` must be an iterable of strings, but it may be empty.
+The same rules used for :data:`sys.path` also apply to a package's
+``__path__``, and :data:`sys.path_hooks` (described below) are
+consulted when traversing a package's ``__path__``.
+
+A package's ``__init__.py`` file may set or alter the package's ``__path__``
+attribute, and this was typically the way namespace packages were implemented
+prior to :pep:`420`. With the adoption of :pep:`420`, namespace packages no
+longer need to supply ``__init__.py`` files containing only ``__path__``
+manipulation code; the namespace loader automatically sets ``__path__``
+correctly for the namespace package.
+
+
+The Path Based Finder
+=====================
+
+.. index::
+ single: path based finder
+
+As mentioned previously, Python comes with several default meta path finders.
+One of these, called the :term:`path based finder`, searches an :term:`import
+path`, which contains a list of :term:`path entries <path entry>`. Each path
+entry names a location to search for modules.
+
+The path based finder itself doesn't know how to import anything. Instead, it
+traverses the individual path entries, associating each of them with a
+path entry finder that knows how to handle that particular kind of path.
+
+The default set of path entry finders implement all the semantics for finding
+modules on the file system, handling special file types such as Python source
+code (``.py`` files), Python byte code (``.pyc`` and ``.pyo`` files) and
+shared libraries (e.g. ``.so`` files). When supported by the :mod:`zipimport`
+module in the standard library, the default path entry finders also handle
+loading all of these file types (other than shared libraries) from zipfiles.
+
+Path entries need not be limited to file system locations. They can refer to
+URLs, database queries, or any other location that can be specified as a
+string.
+
+The path based finder provides additional hooks and protocols so that you
+can extend and customize the types of searchable path entries. For example,
+if you wanted to support path entries as network URLs, you could write a hook
+that implements HTTP semantics to find modules on the web. This hook (a
+callable) would return a :term:`path entry finder` supporting the protocol
+described below, which was then used to get a loader for the module from the
+web.
+
+A word of warning: this section and the previous both use the term *finder*,
+distinguishing between them by using the terms :term:`meta path finder` and
+:term:`path entry finder`. These two types of finders are very similar,
+support similar protocols, and function in similar ways during the import
+process, but it's important to keep in mind that they are subtly different.
+In particular, meta path finders operate at the beginning of the import
+process, as keyed off the :data:`sys.meta_path` traversal.
+
+By contrast, path entry finders are in a sense an implementation detail
+of the path based finder, and in fact, if the path based finder were to be
+removed from :data:`sys.meta_path`, none of the path entry finder semantics
+would be invoked.
+
+
+Path entry finders
+------------------
+
+.. index::
+ single: sys.path
+ single: sys.path_hooks
+ single: sys.path_importer_cache
+ single: PYTHONPATH
+
+The :term:`path based finder` is responsible for finding and loading Python
+modules and packages whose location is specified with a string :term:`path
+entry`. Most path entries name locations in the file system, but they need
+not be limited to this.
+
+As a meta path finder, the :term:`path based finder` implements the
+:meth:`find_module()` protocol previously described, however it exposes
+additional hooks that can be used to customize how modules are found and
+loaded from the :term:`import path`.
+
+Three variables are used by the :term:`path based finder`, :data:`sys.path`,
+:data:`sys.path_hooks` and :data:`sys.path_importer_cache`. The ``__path__``
+attributes on package objects are also used. These provide additional ways
+that the import machinery can be customized.
+
+:data:`sys.path` contains a list of strings providing search locations for
+modules and packages. It is initialized from the :data:`PYTHONPATH`
+environment variable and various other installation- and
+implementation-specific defaults. Entries in :data:`sys.path` can name
+directories on the file system, zip files, and potentially other "locations"
+(see the :mod:`site` module) that should be searched for modules, such as
+URLs, or database queries.
+
+The :term:`path based finder` is a :term:`meta path finder`, so the import
+machinery begins the :term:`import path` search by calling the path
+based finder's :meth:`find_module()` method as described previously. When
+the ``path`` argument to :meth:`find_module()` is given, it will be a
+list of string paths to traverse - typically a package's ``__path__``
+attribute for an import within that package. If the ``path`` argument
+is ``None``, this indicates a top level import and :data:`sys.path` is used.
+
+The path based finder iterates over every entry in the search path, and
+for each of these, looks for an appropriate :term:`path entry finder` for the
+path entry. Because this can be an expensive operation (e.g. there may be
+`stat()` call overheads for this search), the path based finder maintains
+a cache mapping path entries to path entry finders. This cache is maintained
+in :data:`sys.path_importer_cache` (despite the name, this cache actually
+stores finder objects rather than being limited to :term:`importer` objects).
+In this way, the expensive search for a particular :term:`path entry`
+location's :term:`path entry finder` need only be done once. User code is
+free to remove cache entries from :data:`sys.path_importer_cache` forcing
+the path based finder to perform the path entry search again [#fnpic]_.
+
+If the path entry is not present in the cache, the path based finder iterates
+over every callable in :data:`sys.path_hooks`. Each of the
+:term:`path entry hooks <path entry hook>` in this list is called with a
+single argument, the path entry to be searched. This callable may either
+return a :term:`path entry finder` that can handle the path entry, or it may
+raise :exc:`ImportError`.
+An :exc:`ImportError` is used by the path based finder to signal that the hook
+cannot find a :term:`path entry finder` for that :term:`path entry`. The
+exception is ignored and :term:`import path` iteration continues.
+
+If :data:`sys.path_hooks` iteration ends with no :term:`path entry finder`
+being returned, then the path based finder's :meth:`find_module()` method
+will store ``None`` in :data:`sys.path_importer_cache` (to indicate that
+there is no finder for this path entry) and return ``None``, indicating that
+this :term:`meta path finder` could not find the module.
+
+If a :term:`path entry finder` *is* returned by one of the :term:`path entry
+hook` callables on :data:`sys.path_hooks`, then the following protocol is used
+to ask the finder for a module loader, which is then used to load the module.
+
+
+Path entry finder protocol
+--------------------------
+
+In order to support imports of modules and initialized packages and also to
+contribute portions to namespace packages, path entry finders must implement
+the :meth:`find_loader()` method.
+
+:meth:`find_loader()` takes one argument, the fully qualified name of the
+module being imported. :meth:`find_loader()` returns a 2-tuple where the
+first item is the loader and the second item is a namespace :term:`portion`.
+When the first item (i.e. the loader) is ``None``, this means that while the
+path entry finder does not have a loader for the named module, it knows that the
+path entry contributes to a namespace portion for the named module. This will
+almost always be the case where Python is asked to import a namespace package
+that has no physical presence on the file system. When a path entry finder
+returns ``None`` for the loader, the second item of the 2-tuple return value
+must be a sequence, although it can be empty.
+
+If :meth:`find_loader()` returns a non-``None`` loader value, the portion is
+ignored and the loader is returned from the path based finder, terminating
+the search through the path entries.
+
+For backwards compatibility with other implementations of the import
+protocol, many path entry finders also support the same,
+traditional :meth:`find_module()` method that meta path finders support.
+However path entry finder :meth:`find_module()` methods are never called
+with a ``path`` argument (they are expected to record the appropriate
+path information from the initial call to the path hook).
+
+The :meth:`find_module()` method on path entry finders is deprecated,
+as it does not allow the path entry finder to contribute portions to
+namespace packages. Instead path entry finders should implement the
+:meth:`find_loader()` method as described above. If it exists on the path
+entry finder, the import system will always call :meth:`find_loader()`
+in preference to :meth:`find_module()`.
+
+
+Replacing the standard import system
+====================================
+
+The most reliable mechanism for replacing the entire import system is to
+delete the default contents of :data:`sys.meta_path`, replacing them
+entirely with a custom meta path hook.
+
+If it is acceptable to only alter the behaviour of import statements
+without affecting other APIs that access the import system, then replacing
+the builtin :func:`__import__` function may be sufficient. This technique
+may also be employed at the module level to only alter the behaviour of
+import statements within that module.
+
+To selectively prevent import of some modules from a hook early on the
+meta path (rather than disabling the standard import system entirely),
+it is sufficient to raise :exc:`ImportError` directly from
+:meth:`find_module` instead of returning ``None``. The latter indicates
+that the meta path search should continue. while raising an exception
+terminates it immediately.
+
+
+Open issues
+===========
+
+XXX It would be really nice to have a diagram.
+
+XXX * (import_machinery.rst) how about a section devoted just to the
+attributes of modules and packages, perhaps expanding upon or supplanting the
+related entries in the data model reference page?
+
+XXX runpy, pkgutil, et al in the library manual should all get "See Also"
+links at the top pointing to the new import system section.
+
+
+References
+==========
+
+The import machinery has evolved considerably since Python's early days. The
+original `specification for packages
+<http://www.python.org/doc/essays/packages.html>`_ is still available to read,
+although some details have changed since the writing of that document.
+
+The original specification for :data:`sys.meta_path` was :pep:`302`, with
+subsequent extension in :pep:`420`.
+
+:pep:`420` introduced :term:`namespace packages <namespace package>` for
+Python 3.3. :pep:`420` also introduced the :meth:`find_loader` protocol as an
+alternative to :meth:`find_module`.
+
+:pep:`366` describes the addition of the ``__package__`` attribute for
+explicit relative imports in main modules.
+
+:pep:`328` introduced absolute and explicit relative imports and initially
+proposed ``__name__`` for semantics :pep:`366` would eventually specify for
+``__package__``.
+
+:pep:`338` defines executing modules as scripts.
+
+
+Footnotes
+=========
+
+.. [#fnmo] See :class:`types.ModuleType`.
+
+.. [#fnlo] The importlib implementation avoids using the return value
+ directly. Instead, it gets the module object by looking the module name up
+ in :data:`sys.modules`. The indirect effect of this is that an imported
+ module may replace itself in :data:`sys.modules`. This is
+ implementation-specific behavior that is not guaranteed to work in other
+ Python implementations.
+
+.. [#fnpic] In legacy code, it is possible to find instances of
+ :class:`imp.NullImporter` in the :data:`sys.path_importer_cache`. It
+ is recommended that code be changed to use ``None`` instead. See
+ :ref:`portingpythoncode` for more details.
diff --git a/Doc/reference/index.rst b/Doc/reference/index.rst
index bd1a281..55f93d7 100644
--- a/Doc/reference/index.rst
+++ b/Doc/reference/index.rst
@@ -24,6 +24,7 @@ interfaces available to C/C++ programmers in detail.
lexical_analysis.rst
datamodel.rst
executionmodel.rst
+ import.rst
expressions.rst
simple_stmts.rst
compound_stmts.rst
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
index 4b49738..94f219b 100644
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -401,7 +401,7 @@ String literals are described by the following lexical definitions:
.. productionlist::
stringliteral: [`stringprefix`](`shortstring` | `longstring`)
- stringprefix: "r" | "R"
+ stringprefix: "r" | "u" | "R" | "U"
shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"'
longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""'
shortstringitem: `shortstringchar` | `stringescapeseq`
@@ -412,7 +412,7 @@ String literals are described by the following lexical definitions:
.. productionlist::
bytesliteral: `bytesprefix`(`shortbytes` | `longbytes`)
- bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR"
+ bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB"
shortbytes: "'" `shortbytesitem`* "'" | '"' `shortbytesitem`* '"'
longbytes: "'''" `longbytesitem`* "'''" | '"""' `longbytesitem`* '"""'
shortbytesitem: `shortbyteschar` | `bytesescapeseq`
@@ -441,10 +441,24 @@ instance of the :class:`bytes` type instead of the :class:`str` type. They
may only contain ASCII characters; bytes with a numeric value of 128 or greater
must be expressed with escapes.
+As of Python 3.3 it is possible again to prefix unicode strings with a
+``u`` prefix to simplify maintenance of dual 2.x and 3.x codebases.
+
Both string and bytes literals may optionally be prefixed with a letter ``'r'``
or ``'R'``; such strings are called :dfn:`raw strings` and treat backslashes as
literal characters. As a result, in string literals, ``'\U'`` and ``'\u'``
-escapes in raw strings are not treated specially.
+escapes in raw strings are not treated specially. Given that Python 2.x's raw
+unicode literals behave differently than Python 3.x's the ``'ur'`` syntax
+is not supported.
+
+ .. versionadded:: 3.3
+ The ``'rb'`` prefix of raw bytes literals has been added as a synonym
+ of ``'br'``.
+
+ .. versionadded:: 3.3
+ Support for the unicode legacy literal (``u'value'``) was reintroduced
+ to simplify the maintenance of dual Python 2.x and 3.x codebases.
+ See :pep:`414` for more information.
In triple-quoted strings, unescaped newlines and quotes are allowed (and are
retained), except that three unescaped quotes in a row terminate the string. (A
@@ -492,13 +506,13 @@ Escape sequences only recognized in string literals are:
+-----------------+---------------------------------+-------+
| Escape Sequence | Meaning | Notes |
+=================+=================================+=======+
-| ``\N{name}`` | Character named *name* in the | |
+| ``\N{name}`` | Character named *name* in the | \(4) |
| | Unicode database | |
+-----------------+---------------------------------+-------+
-| ``\uxxxx`` | Character with 16-bit hex value | \(4) |
+| ``\uxxxx`` | Character with 16-bit hex value | \(5) |
| | *xxxx* | |
+-----------------+---------------------------------+-------+
-| ``\Uxxxxxxxx`` | Character with 32-bit hex value | \(5) |
+| ``\Uxxxxxxxx`` | Character with 32-bit hex value | \(6) |
| | *xxxxxxxx* | |
+-----------------+---------------------------------+-------+
@@ -516,13 +530,15 @@ Notes:
with the given value.
(4)
+ .. versionchanged:: 3.3
+ Support for name aliases [#]_ has been added.
+
+(5)
Individual code units which form parts of a surrogate pair can be encoded using
this escape sequence. Exactly four hex digits are required.
-(5)
- Any Unicode character can be encoded this way, but characters outside the Basic
- Multilingual Plane (BMP) will be encoded using a surrogate pair if Python is
- compiled to use 16-bit code units (the default). Exactly eight hex digits
+(6)
+ Any Unicode character can be encoded this way. Exactly eight hex digits
are required.
@@ -706,3 +722,8 @@ The following printing ASCII characters are not used in Python. Their
occurrence outside string literals and comments is an unconditional error::
$ ? `
+
+
+.. rubric:: Footnotes
+
+.. [#] http://www.unicode.org/Public/6.1.0/ucd/NameAliases.txt
diff --git a/Doc/reference/simple_stmts.rst b/Doc/reference/simple_stmts.rst
index 73183d5..81dc748 100644
--- a/Doc/reference/simple_stmts.rst
+++ b/Doc/reference/simple_stmts.rst
@@ -424,10 +424,10 @@ When :keyword:`return` passes control out of a :keyword:`try` statement with a
:keyword:`finally` clause, that :keyword:`finally` clause is executed before
really leaving the function.
-In a generator function, the :keyword:`return` statement is not allowed to
-include an :token:`expression_list`. In that context, a bare :keyword:`return`
-indicates that the generator is done and will cause :exc:`StopIteration` to be
-raised.
+In a generator function, the :keyword:`return` statement indicates that the
+generator is done and will cause :exc:`StopIteration` to be raised. The returned
+value (if any) is used as an argument to construct :exc:`StopIteration` and
+becomes the :attr:`StopIteration.value` attribute.
.. _yield:
@@ -449,6 +449,7 @@ The :keyword:`yield` statement is only used when defining a generator function,
and is only used in the body of the generator function. Using a :keyword:`yield`
statement in a function definition is sufficient to cause that definition to
create a generator function instead of a normal function.
+
When a generator function is called, it returns an iterator known as a generator
iterator, or more commonly, a generator. The body of the generator function is
executed by calling the :func:`next` function on the generator repeatedly until
@@ -468,14 +469,28 @@ resumed before it is finalized (by reaching a zero reference count or by being
garbage collected), the generator-iterator's :meth:`close` method will be
called, allowing any pending :keyword:`finally` clauses to execute.
+When ``yield from <expr>`` is used, it treats the supplied expression as
+a subiterator, producing values from it until the underlying iterator is
+exhausted.
+
+ .. versionchanged:: 3.3
+ Added ``yield from <expr>`` to delegate control flow to a subiterator
+
+For full details of :keyword:`yield` semantics, refer to the :ref:`yieldexpr`
+section.
+
.. seealso::
:pep:`0255` - Simple Generators
The proposal for adding generators and the :keyword:`yield` statement to Python.
:pep:`0342` - Coroutines via Enhanced Generators
- The proposal that, among other generator enhancements, proposed allowing
- :keyword:`yield` to appear inside a :keyword:`try` ... :keyword:`finally` block.
+ The proposal to enhance the API and syntax of generators, making them
+ usable as simple coroutines.
+
+ :pep:`0380` - Syntax for Delegating to a Subgenerator
+ The proposal to introduce the :token:`yield_from` syntax, making delegation
+ to sub-generators easy.
.. _raise:
@@ -645,161 +660,98 @@ The :keyword:`import` statement
relative_module: "."* `module` | "."+
name: `identifier`
-Import statements are executed in two steps: (1) find a module, and initialize
-it if necessary; (2) define a name or names in the local namespace (of the scope
-where the :keyword:`import` statement occurs). The statement comes in two
-forms differing on whether it uses the :keyword:`from` keyword. The first form
-(without :keyword:`from`) repeats these steps for each identifier in the list.
-The form with :keyword:`from` performs step (1) once, and then performs step
-(2) repeatedly. For a reference implementation of step (1), see the
-:mod:`importlib` module.
+The basic import statement (no :keyword:`from` clause) is executed in two
+steps:
-.. index::
- single: package
+#. find a module, loading and initializing it if necessary
+#. define a name or names in the local namespace for the scope where
+ the :keyword:`import` statement occurs.
-To understand how step (1) occurs, one must first understand how Python handles
-hierarchical naming of modules. To help organize modules and provide a
-hierarchy in naming, Python has a concept of packages. A package can contain
-other packages and modules while modules cannot contain other modules or
-packages. From a file system perspective, packages are directories and modules
-are files. The original `specification for packages
-<http://www.python.org/doc/essays/packages.html>`_ is still available to read,
-although minor details have changed since the writing of that document.
+When the statement contains multiple clauses (separated by
+commas) the two steps are carried out separately for each clause, just
+as though the clauses had been separated out into individiual import
+statements.
-.. index::
- single: sys.modules
+The details of the first step, finding and loading modules is described in
+greater detail in the section on the :ref:`import system <importsystem>`,
+which also describes the various types of packages and modules that can
+be imported, as well as all the hooks that can be used to customize
+the import system. Note that failures in this step may indicate either
+that the module could not be located, *or* that an error occurred while
+initializing the module, which includes execution of the module's code.
-Once the name of the module is known (unless otherwise specified, the term
-"module" will refer to both packages and modules), searching
-for the module or package can begin. The first place checked is
-:data:`sys.modules`, the cache of all modules that have been imported
-previously. If the module is found there then it is used in step (2) of import
-unless ``None`` is found in :data:`sys.modules`, in which case
-:exc:`ImportError` is raised.
+If the requested module is retrieved successfully, it will be made
+available in the local namespace in one of three ways:
-.. index::
- single: sys.meta_path
- single: finder
- pair: finder; find_module
- single: __path__
-
-If the module is not found in the cache, then :data:`sys.meta_path` is searched
-(the specification for :data:`sys.meta_path` can be found in :pep:`302`).
-The object is a list of :term:`finder` objects which are queried in order as to
-whether they know how to load the module by calling their :meth:`find_module`
-method with the name of the module. If the module happens to be contained
-within a package (as denoted by the existence of a dot in the name), then a
-second argument to :meth:`find_module` is given as the value of the
-:attr:`__path__` attribute from the parent package (everything up to the last
-dot in the name of the module being imported). If a finder can find the module
-it returns a :term:`loader` (discussed later) or returns ``None``.
+* If the module name is followed by :keyword:`as`, then the name
+ following :keyword:`as` is bound directly to the imported module.
+* If no other name is specified, and the module being imported is a top
+ level module, the module's name is bound in the local namespace as a
+ reference to the imported module
+* If the module being imported is *not* a top level module, then the name
+ of the top level package that contains the module is bound in the local
+ namespace as a reference to the top level package. The imported module
+ must be accessed using its full qualified name rather than directly
-.. index::
- single: sys.path_hooks
- single: sys.path_importer_cache
- single: sys.path
-
-If none of the finders on :data:`sys.meta_path` are able to find the module
-then some implicitly defined finders are queried. Implementations of Python
-vary in what implicit meta path finders are defined. The one they all do
-define, though, is one that handles :data:`sys.path_hooks`,
-:data:`sys.path_importer_cache`, and :data:`sys.path`.
-
-The implicit finder searches for the requested module in the "paths" specified
-in one of two places ("paths" do not have to be file system paths). If the
-module being imported is supposed to be contained within a package then the
-second argument passed to :meth:`find_module`, :attr:`__path__` on the parent
-package, is used as the source of paths. If the module is not contained in a
-package then :data:`sys.path` is used as the source of paths.
-
-Once the source of paths is chosen it is iterated over to find a finder that
-can handle that path. The dict at :data:`sys.path_importer_cache` caches
-finders for paths and is checked for a finder. If the path does not have a
-finder cached then :data:`sys.path_hooks` is searched by calling each object in
-the list with a single argument of the path, returning a finder or raises
-:exc:`ImportError`. If a finder is returned then it is cached in
-:data:`sys.path_importer_cache` and then used for that path entry. If no finder
-can be found but the path exists then a value of ``None`` is
-stored in :data:`sys.path_importer_cache` to signify that an implicit,
-file-based finder that handles modules stored as individual files should be
-used for that path. If the path does not exist then a finder which always
-returns ``None`` is placed in the cache for the path.
.. index::
- single: loader
- pair: loader; load_module
- exception: ImportError
-
-If no finder can find the module then :exc:`ImportError` is raised. Otherwise
-some finder returned a loader whose :meth:`load_module` method is called with
-the name of the module to load (see :pep:`302` for the original definition of
-loaders). A loader has several responsibilities to perform on a module it
-loads. First, if the module already exists in :data:`sys.modules` (a
-possibility if the loader is called outside of the import machinery) then it
-is to use that module for initialization and not a new module. But if the
-module does not exist in :data:`sys.modules` then it is to be added to that
-dict before initialization begins. If an error occurs during loading of the
-module and it was added to :data:`sys.modules` it is to be removed from the
-dict. If an error occurs but the module was already in :data:`sys.modules` it
-is left in the dict.
+ pair: name; binding
+ keyword: from
+ exception: ImportError
-.. index::
- single: __name__
- single: __file__
- single: __path__
- single: __package__
- single: __loader__
-
-The loader must set several attributes on the module. :data:`__name__` is to be
-set to the name of the module. :data:`__file__` is to be the "path" to the file
-unless the module is built-in (and thus listed in
-:data:`sys.builtin_module_names`) in which case the attribute is not set.
-If what is being imported is a package then :data:`__path__` is to be set to a
-list of paths to be searched when looking for modules and packages contained
-within the package being imported. :data:`__package__` is optional but should
-be set to the name of package that contains the module or package (the empty
-string is used for module not contained in a package). :data:`__loader__` is
-also optional but should be set to the loader object that is loading the
-module.
+The :keyword:`from` form uses a slightly more complex process:
-.. index::
- exception: ImportError
+#. find the module specified in the :keyword:`from` clause loading and
+ initializing it if necessary;
+#. for each of the identifiers specified in the :keyword:`import` clauses:
-If an error occurs during loading then the loader raises :exc:`ImportError` if
-some other exception is not already being propagated. Otherwise the loader
-returns the module that was loaded and initialized.
+ #. check if the imported module has an attribute by that name
+ #. if not, attempt to import a submodule with that name and then
+ check the imported module again for that attribute
+ #. if the attribute is not found, :exc:`ImportError` is raised.
+ #. otherwise, a reference to that value is bound in the local namespace,
+ using the name in the :keyword:`as` clause if it is present,
+ otherwise using the attribute name
-When step (1) finishes without raising an exception, step (2) can begin.
+Examples::
-The first form of :keyword:`import` statement binds the module name in the local
-namespace to the module object, and then goes on to import the next identifier,
-if any. If the module name is followed by :keyword:`as`, the name following
-:keyword:`as` is used as the local name for the module.
+ import foo # foo imported and bound locally
+ import foo.bar.baz # foo.bar.baz imported, foo bound locally
+ import foo.bar.baz as fbb # foo.bar.baz imported and bound as fbb
+ from foo.bar import baz # foo.bar.baz imported and bound as baz
+ from foo import attr # foo imported and foo.attr bound as attr
-.. index::
- pair: name; binding
- exception: ImportError
+If the list of identifiers is replaced by a star (``'*'``), all public
+names defined in the module are bound in the local namespace for the scope
+where the :keyword:`import` statement occurs.
-The :keyword:`from` form does not bind the module name: it goes through the list
-of identifiers, looks each one of them up in the module found in step (1), and
-binds the name in the local namespace to the object thus found. As with the
-first form of :keyword:`import`, an alternate local name can be supplied by
-specifying ":keyword:`as` localname". If a name is not found,
-:exc:`ImportError` is raised. If the list of identifiers is replaced by a star
-(``'*'``), all public names defined in the module are bound in the local
-namespace of the :keyword:`import` statement.
+.. index:: single: __all__ (optional module attribute)
+
+The *public names* defined by a module are determined by checking the module's
+namespace for a variable named ``__all__``; if defined, it must be a sequence
+of strings which are names defined or imported by that module. The names
+given in ``__all__`` are all considered public and are required to exist. If
+``__all__`` is not defined, the set of public names includes all names found
+in the module's namespace which do not begin with an underscore character
+(``'_'``). ``__all__`` should contain the entire public API. It is intended
+to avoid accidentally exporting items that are not part of the API (such as
+library modules which were imported and used within the module).
+
+The :keyword:`from` form with ``*`` may only occur in a module scope.
+Attempting to use it in class or function definitions will raise a
+:exc:`SyntaxError`.
.. index:: single: __all__ (optional module attribute)
The *public names* defined by a module are determined by checking the module's
-namespace for a variable named ``__all__``; if defined, it must be a sequence of
-strings which are names defined or imported by that module. The names given in
-``__all__`` are all considered public and are required to exist. If ``__all__``
-is not defined, the set of public names includes all names found in the module's
-namespace which do not begin with an underscore character (``'_'``).
-``__all__`` should contain the entire public API. It is intended to avoid
-accidentally exporting items that are not part of the API (such as library
-modules which were imported and used within the module).
+namespace for a variable named ``__all__``; if defined, it must be a sequence
+of strings which are names defined or imported by that module. The names
+given in ``__all__`` are all considered public and are required to exist. If
+``__all__`` is not defined, the set of public names includes all names found
+in the module's namespace which do not begin with an underscore character
+(``'_'``). ``__all__`` should contain the entire public API. It is intended
+to avoid accidentally exporting items that are not part of the API (such as
+library modules which were imported and used within the module).
The :keyword:`from` form with ``*`` may only occur in a module scope. The wild
card form of import --- ``import *`` --- is only allowed at the module level.
diff --git a/Doc/tools/sphinxext/indexsidebar.html b/Doc/tools/sphinxext/indexsidebar.html
index 748cb91..a0ec32f 100644
--- a/Doc/tools/sphinxext/indexsidebar.html
+++ b/Doc/tools/sphinxext/indexsidebar.html
@@ -3,7 +3,7 @@
<h3>Docs for other versions</h3>
<ul>
<li><a href="http://docs.python.org/2.7/">Python 2.7 (stable)</a></li>
- <li><a href="http://docs.python.org/3.3/">Python 3.3 (in development)</a></li>
+ <li><a href="http://docs.python.org/3.4/">Python 3.4 (in development)</a></li>
<li><a href="http://www.python.org/doc/versions/">Old versions</a></li>
</ul>
diff --git a/Doc/tools/sphinxext/layout.html b/Doc/tools/sphinxext/layout.html
index db4a386..3f68a00 100644
--- a/Doc/tools/sphinxext/layout.html
+++ b/Doc/tools/sphinxext/layout.html
@@ -8,13 +8,70 @@
{% block extrahead %}
<link rel="shortcut icon" type="image/png" href="{{ pathto('_static/py.png', 1) }}" />
{% if not embedded %}<script type="text/javascript" src="{{ pathto('_static/copybutton.js', 1) }}"></script>{% endif %}
+ {% if pagename == 'whatsnew/changelog' %}
+ <script type="text/javascript">
+ $(document).ready(function() {
+ // add the search form and bind the events
+ $('h1').after([
+ '<p>Filter entries by content:',
+ '<input type="text" value="" id="searchbox" style="width: 50%">',
+ '<input type="submit" id="searchbox-submit" value="Filter"></p>'
+ ].join('\n'));
+
+ function dofilter() {
+ try {
+ var query = new RegExp($('#searchbox').val(), 'i');
+ }
+ catch (e) {
+ return; // not a valid regex (yet)
+ }
+ // find headers for the versions (What's new in Python X.Y.Z?)
+ $('#changelog h2').each(function(index1, h2) {
+ var h2_parent = $(h2).parent();
+ var sections_found = 0;
+ // find headers for the sections (Core, Library, etc.)
+ h2_parent.find('h3').each(function(index2, h3) {
+ var h3_parent = $(h3).parent();
+ var entries_found = 0;
+ // find all the entries
+ h3_parent.find('li').each(function(index3, li) {
+ var li = $(li);
+ // check if the query matches the entry
+ if (query.test(li.text())) {
+ li.show();
+ entries_found++;
+ }
+ else {
+ li.hide();
+ }
+ });
+ // if there are entries, show the section, otherwise hide it
+ if (entries_found > 0) {
+ h3_parent.show();
+ sections_found++;
+ }
+ else {
+ h3_parent.hide();
+ }
+ });
+ if (sections_found > 0)
+ h2_parent.show();
+ else
+ h2_parent.hide();
+ });
+ }
+ $('#searchbox').keyup(dofilter);
+ $('#searchbox-submit').click(dofilter);
+ });
+ </script>
+ {% endif %}
{{ super() }}
{% endblock %}
{% block footer %}
<div class="footer">
&copy; <a href="{{ pathto('copyright') }}">Copyright</a> {{ copyright|e }}.
<br />
- The Python Software Foundation is a non-profit corporation.
+ The Python Software Foundation is a non-profit corporation.
<a href="http://www.python.org/psf/donations/">Please donate.</a>
<br />
Last updated on {{ last_updated|e }}.
diff --git a/Doc/tools/sphinxext/patchlevel.py b/Doc/tools/sphinxext/patchlevel.py
index b070d60..bca2eb8 100644
--- a/Doc/tools/sphinxext/patchlevel.py
+++ b/Doc/tools/sphinxext/patchlevel.py
@@ -34,8 +34,7 @@ def get_header_version_info(srcdir):
release = version = '%s.%s' % (d['PY_MAJOR_VERSION'], d['PY_MINOR_VERSION'])
micro = int(d['PY_MICRO_VERSION'])
- if micro != 0:
- release += '.' + str(micro)
+ release += '.' + str(micro)
level = d['PY_RELEASE_LEVEL']
suffixes = {
@@ -51,8 +50,7 @@ def get_header_version_info(srcdir):
def get_sys_version_info():
major, minor, micro, level, serial = sys.version_info
release = version = '%s.%s' % (major, minor)
- if micro:
- release += '.%s' % micro
+ release += '.%s' % micro
if level != 'final':
release += '%s%s' % (level[0], serial)
return version, release
diff --git a/Doc/tools/sphinxext/pyspecific.py b/Doc/tools/sphinxext/pyspecific.py
index 9b2cc47..e8eb703 100644
--- a/Doc/tools/sphinxext/pyspecific.py
+++ b/Doc/tools/sphinxext/pyspecific.py
@@ -5,12 +5,12 @@
Sphinx extension with Python doc-specific markup.
- :copyright: 2008, 2009, 2010 by Georg Brandl.
+ :copyright: 2008, 2009, 2010, 2011, 2012 by Georg Brandl.
:license: Python license.
"""
ISSUE_URI = 'http://bugs.python.org/issue%s'
-SOURCE_URI = 'http://hg.python.org/cpython/file/3.2/%s'
+SOURCE_URI = 'http://hg.python.org/cpython/file/3.3/%s'
from docutils import nodes, utils
from sphinx.util.nodes import split_explicit_title
@@ -174,6 +174,47 @@ class DeprecatedRemoved(Directive):
return ret
+# Support for including Misc/NEWS
+
+import re
+import codecs
+
+issue_re = re.compile('([Ii])ssue #([0-9]+)')
+whatsnew_re = re.compile(r"(?im)^what's new in (.*?)\??$")
+
+class MiscNews(Directive):
+ has_content = False
+ required_arguments = 1
+ optional_arguments = 0
+ final_argument_whitespace = False
+ option_spec = {}
+
+ def run(self):
+ fname = self.arguments[0]
+ source = self.state_machine.input_lines.source(
+ self.lineno - self.state_machine.input_offset - 1)
+ source_dir = path.dirname(path.abspath(source))
+ fpath = path.join(source_dir, fname)
+ self.state.document.settings.record_dependencies.add(fpath)
+ try:
+ fp = codecs.open(fpath, encoding='utf-8')
+ try:
+ content = fp.read()
+ finally:
+ fp.close()
+ except Exception:
+ text = 'The NEWS file is not available.'
+ node = nodes.strong(text, text)
+ return [node]
+ content = issue_re.sub(r'`\1ssue #\2 <http://bugs.python.org/\2>`__',
+ content)
+ content = whatsnew_re.sub(r'\1', content)
+ # remove first 3 lines as they are the main heading
+ lines = ['.. default-role:: obj', ''] + content.splitlines()[3:]
+ self.state_machine.insert_input(lines, fname)
+ return []
+
+
# Support for building "topic help" for pydoc
pydoc_topic_labels = [
@@ -230,11 +271,12 @@ class PydocTopicsBuilder(Builder):
document.append(doctree.ids[labelid])
destination = StringOutput(encoding='utf-8')
writer.write(document, destination)
- self.topics[label] = str(writer.output)
+ self.topics[label] = writer.output.encode('utf-8')
def finish(self):
f = open(path.join(self.outdir, 'topics.py'), 'w')
try:
+ f.write('# -*- coding: utf-8 -*-\n')
f.write('# Autogenerated by Sphinx on %s\n' % asctime())
f.write('topics = ' + pformat(self.topics) + '\n')
finally:
@@ -304,3 +346,4 @@ def setup(app):
app.add_description_unit('2to3fixer', '2to3fixer', '%s (2to3 fixer)')
app.add_directive_to_domain('py', 'decorator', PyDecoratorFunction)
app.add_directive_to_domain('py', 'decoratormethod', PyDecoratorMethod)
+ app.add_directive('miscnews', MiscNews)
diff --git a/Doc/tools/sphinxext/susp-ignored.csv b/Doc/tools/sphinxext/susp-ignored.csv
index 5076aed..2c15a5c 100644
--- a/Doc/tools/sphinxext/susp-ignored.csv
+++ b/Doc/tools/sphinxext/susp-ignored.csv
@@ -1,16 +1,24 @@
c-api/arg,,:ref,"PyArg_ParseTuple(args, ""O|O:ref"", &object, &callback)"
c-api/list,,:high,list[low:high]
c-api/list,,:high,list[low:high] = itemlist
+c-api/sequence,,:i2,del o[i1:i2]
c-api/sequence,,:i2,o[i1:i2]
c-api/sequence,,:i2,o[i1:i2] = v
-c-api/sequence,,:i2,del o[i1:i2]
c-api/unicode,,:end,str[start:end]
+c-api/unicode,,:start,unicode[start:start+length]
+distutils/examples,267,`,This is the description of the ``foobar`` package.
distutils/setupscript,,::,
extending/embedding,,:numargs,"if(!PyArg_ParseTuple(args, "":numargs""))"
-extending/extending,,:set,"if (PyArg_ParseTuple(args, ""O:set_callback"", &temp)) {"
extending/extending,,:myfunction,"PyArg_ParseTuple(args, ""D:myfunction"", &c);"
+extending/extending,,:set,"if (PyArg_ParseTuple(args, ""O:set_callback"", &temp)) {"
extending/newtypes,,:call,"if (!PyArg_ParseTuple(args, ""sss:call"", &arg1, &arg2, &arg3)) {"
extending/windows,,:initspam,/export:initspam
+faq/programming,,:chr,">=4.0) or 1+f(xc,yc,x*x-y*y+xc,2.0*x*y+yc,k-1,f):f(xc,yc,x,y,k,f):chr("
+faq/programming,,::,for x in sequence[::-1]:
+faq/programming,,:reduce,"print((lambda Ru,Ro,Iu,Io,IM,Sx,Sy:reduce(lambda x,y:x+y,map(lambda y,"
+faq/programming,,:reduce,"Sx=Sx,Sy=Sy:reduce(lambda x,y:x+y,map(lambda x,xc=Ru,yc=yc,Ru=Ru,Ro=Ro,"
+faq/windows,229,:EOF,@setlocal enableextensions & python -x %~f0 %* & goto :EOF
+faq/windows,393,:REG,.py :REG_SZ: c:\<path to python>\python.exe -u %s %s
howto/cporting,,:add,"if (!PyArg_ParseTuple(args, ""ii:add_ints"", &one, &two))"
howto/cporting,,:encode,"if (!PyArg_ParseTuple(args, ""O:encode_object"", &myobj))"
howto/cporting,,:say,"if (!PyArg_ParseTuple(args, ""U:say_hello"", &name))"
@@ -22,19 +30,91 @@ howto/curses,,:magenta,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:m
howto/curses,,:red,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and"
howto/curses,,:white,"7:white."
howto/curses,,:yellow,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and"
+howto/ipaddress,,:DB8,>>> ipaddress.ip_address('2001:DB8::1')
+howto/ipaddress,,::,>>> ipaddress.ip_address('2001:DB8::1')
+howto/ipaddress,,:db8,IPv6Address('2001:db8::1')
+howto/ipaddress,,::,IPv6Address('2001:db8::1')
+howto/ipaddress,,:db8,IPv6Address('2001:db8::1')
+howto/ipaddress,,::,IPv6Address('2001:db8::1')
+howto/ipaddress,,::,IPv6Address('::1')
+howto/ipaddress,,:db8,>>> ipaddress.ip_network('2001:db8::0/96')
+howto/ipaddress,,::,>>> ipaddress.ip_network('2001:db8::0/96')
+howto/ipaddress,,:db8,IPv6Network('2001:db8::/96')
+howto/ipaddress,,::,IPv6Network('2001:db8::/96')
+howto/ipaddress,,:db8,IPv6Network('2001:db8::/128')
+howto/ipaddress,,::,IPv6Network('2001:db8::/128')
+howto/ipaddress,,:db8,>>> ipaddress.ip_network('2001:db8::1/96')
+howto/ipaddress,,::,>>> ipaddress.ip_network('2001:db8::1/96')
+howto/ipaddress,,:db8,IPv6Interface('2001:db8::1/96')
+howto/ipaddress,,::,IPv6Interface('2001:db8::1/96')
+howto/ipaddress,,:db8,>>> addr6 = ipaddress.ip_address('2001:db8::1')
+howto/ipaddress,,::,>>> addr6 = ipaddress.ip_address('2001:db8::1')
+howto/ipaddress,,:db8,>>> host6 = ipaddress.ip_interface('2001:db8::1/96')
+howto/ipaddress,,::,>>> host6 = ipaddress.ip_interface('2001:db8::1/96')
+howto/ipaddress,,:db8,IPv6Network('2001:db8::/96')
+howto/ipaddress,,::,IPv6Network('2001:db8::/96')
+howto/ipaddress,,:db8,>>> net6 = ipaddress.ip_network('2001:db8::0/96')
+howto/ipaddress,,::,>>> net6 = ipaddress.ip_network('2001:db8::0/96')
+howto/ipaddress,,:db8,>>> net6 = ipaddress.ip_network('2001:db8::0/96')
+howto/ipaddress,,::,>>> net6 = ipaddress.ip_network('2001:db8::0/96')
+howto/ipaddress,,:ffff,IPv6Address('ffff:ffff:ffff:ffff:ffff:ffff::')
+howto/ipaddress,,::,IPv6Address('ffff:ffff:ffff:ffff:ffff:ffff::')
+howto/ipaddress,,::,IPv6Address('::ffff:ffff')
+howto/ipaddress,,:ffff,IPv6Address('::ffff:ffff')
+howto/ipaddress,,::,IPv6Address('2001::1')
+howto/ipaddress,,::,IPv6Address('2001::ffff:ffff')
+howto/ipaddress,,:ffff,IPv6Address('2001::ffff:ffff')
+howto/ipaddress,,:db8,'2001:db8::'
+howto/ipaddress,,::,'2001:db8::'
+howto/ipaddress,,:db8,'2001:db8::/96'
+howto/ipaddress,,::,'2001:db8::/96'
+howto/logging,,:And,"WARNING:And this, too"
+howto/logging,,:And,"WARNING:root:And this, too"
+howto/logging,,:Doing,INFO:root:Doing something
+howto/logging,,:Finished,INFO:root:Finished
+howto/logging,,:logger,severity:logger name:message
+howto/logging,,:Look,WARNING:root:Look before you leap!
+howto/logging,,:message,severity:logger name:message
+howto/logging,,:root,DEBUG:root:This message should go to the log file
+howto/logging,,:root,INFO:root:Doing something
+howto/logging,,:root,INFO:root:Finished
+howto/logging,,:root,INFO:root:So should this
+howto/logging,,:root,INFO:root:Started
+howto/logging,,:root,"WARNING:root:And this, too"
+howto/logging,,:root,WARNING:root:Look before you leap!
+howto/logging,,:root,WARNING:root:Watch out!
+howto/logging,,:So,INFO:root:So should this
+howto/logging,,:So,INFO:So should this
+howto/logging,,:Started,INFO:root:Started
+howto/logging,,:This,DEBUG:root:This message should go to the log file
+howto/logging,,:This,DEBUG:This message should appear on the console
+howto/logging,,:Watch,WARNING:root:Watch out!
+howto/pyporting,75,::,# make sure to use :: Python *and* :: Python :: 3 so
+howto/pyporting,75,::,"'Programming Language :: Python',"
+howto/pyporting,75,::,'Programming Language :: Python :: 3'
howto/regex,,::,
howto/regex,,:foo,(?:foo)
howto/urllib2,,:example,"for example ""joe@password:example.com"""
howto/webservers,,.. image:,.. image:: http.png
library/audioop,,:ipos,"# factor = audioop.findfactor(in_test[ipos*2:ipos*2+len(out_test)],"
+library/bisect,32,:hi,all(val >= x for val in a[i:hi])
+library/bisect,42,:hi,all(val > x for val in a[i:hi])
+library/configparser,,:home,my_dir: ${Common:home_dir}/twosheds
+library/configparser,,:option,${section:option}
+library/configparser,,:path,python_dir: ${Frameworks:path}/Python/Versions/${Frameworks:Python}
+library/configparser,,:Python,python_dir: ${Frameworks:path}/Python/Versions/${Frameworks:Python}
+library/configparser,,`,# Set the optional `raw` argument of get() to True if you wish to disable
+library/configparser,,:system,path: ${Common:system_dir}/Library/Frameworks/
+library/configparser,,`,# The optional `fallback` argument can be used to provide a fallback value
+library/configparser,,`,# The optional `vars` argument is a dict with members that will take
library/datetime,,:MM,
library/datetime,,:SS,
library/decimal,,:optional,"trailneg:optional trailing minus indicator"
library/difflib,,:ahi,a[alo:ahi]
library/difflib,,:bhi,b[blo:bhi]
+library/difflib,,:i1,
library/difflib,,:i2,
library/difflib,,:j2,
-library/difflib,,:i1,
library/dis,,:TOS,
library/dis,,`,TOS = `TOS`
library/doctest,,`,``factorial`` from the ``example`` module:
@@ -44,96 +124,186 @@ library/functions,,:step,a[start:stop:step]
library/functions,,:stop,"a[start:stop, i]"
library/functions,,:stop,a[start:stop:step]
library/hotshot,,:lineno,"ncalls tottime percall cumtime percall filename:lineno(function)"
-library/httplib,,:port,host:port
-library/imaplib,,:MM,"""DD-Mmm-YYYY HH:MM:SS +HHMM"""
-library/imaplib,,:SS,"""DD-Mmm-YYYY HH:MM:SS +HHMM"""
-library/itertools,,:stop,elements from seq[start:stop:step]
+library/http.client,,:port,host:port
+library/http.cookies,,`,!#$%&'*+-.^_`|~:
+library/imaplib,,:MM,"""DD-Mmm-YYYY HH:MM:SS"
+library/imaplib,,:SS,"""DD-Mmm-YYYY HH:MM:SS"
+library/inspect,,:int,">>> def foo(a, *, b:int, **kwargs):"
+library/inspect,,:int,"'(a, *, b:int, **kwargs)'"
+library/inspect,,:int,'b:int'
+library/ipaddress,,:db8,>>> ipaddress.ip_address('2001:db8::')
+library/ipaddress,,::,>>> ipaddress.ip_address('2001:db8::')
+library/ipaddress,,:db8,IPv6Address('2001:db8::')
+library/ipaddress,,::,IPv6Address('2001:db8::')
+library/ipaddress,,:db8,>>> ipaddress.IPv6Address('2001:db8::1000')
+library/ipaddress,,::,>>> ipaddress.IPv6Address('2001:db8::1000')
+library/ipaddress,,:db8,IPv6Address('2001:db8::1000')
+library/ipaddress,,::,IPv6Address('2001:db8::1000')
+library/ipaddress,,:db8,>>> ipaddress.IPv6Interface('2001:db8::1000/96')
+library/ipaddress,,::,>>> ipaddress.IPv6Interface('2001:db8::1000/96')
+library/ipaddress,,:db8,IPv6Interface('2001:db8::1000/96')
+library/ipaddress,,::,IPv6Interface('2001:db8::1000/96')
+library/ipaddress,,:db8,>>> ipaddress.IPv6Interface('2001:db8::1000/96').network
+library/ipaddress,,::,>>> ipaddress.IPv6Interface('2001:db8::1000/96').network
+library/ipaddress,,:db8,IPv6Network('2001:db8::/96')
+library/ipaddress,,::,IPv6Network('2001:db8::/96')
+library/ipaddress,,:db8,>>> ipaddress.IPv6Network('2001:db8::/96')
+library/ipaddress,,::,>>> ipaddress.IPv6Network('2001:db8::/96')
+library/ipaddress,,:db8,IPv6Network('2001:db8::/96')
+library/ipaddress,,::,IPv6Network('2001:db8::/96')
+library/ipaddress,,:db8,>>> ipaddress.IPv6Network('2001:db8::/96').netmask
+library/ipaddress,,::,>>> ipaddress.IPv6Network('2001:db8::/96').netmask
+library/ipaddress,,:ffff,IPv6Address('ffff:ffff:ffff:ffff:ffff:ffff::')
+library/ipaddress,,::,IPv6Address('ffff:ffff:ffff:ffff:ffff:ffff::')
+library/ipaddress,,:db8,">>> ipaddress.IPv6Network('2001:db8::1000/96', strict=False)"
+library/ipaddress,,::,">>> ipaddress.IPv6Network('2001:db8::1000/96', strict=False)"
+library/ipaddress,,::,"""::abc:7:def"""
+library/ipaddress,,:def,"""::abc:7:def"""
+library/ipaddress,,::,::FFFF/96
+library/ipaddress,,::,2002::/16
+library/ipaddress,,::,2001::/32
+library/ipaddress,,::,>>> str(ipaddress.IPv6Address('::1'))
+library/ipaddress,,::,'::1'
+library/ipaddress,,::,>>> int(ipaddress.IPv6Address('::1'))
+library/ipaddress,,:ff00,ffff:ff00::
+library/ipaddress,,:db00,2001:db00::0/24
+library/ipaddress,,::,2001:db00::0/24
+library/ipaddress,,:db00,2001:db00::0/ffff:ff00::
+library/ipaddress,,::,2001:db00::0/ffff:ff00::
+library/ipaddress,,:ff00,2001:db00::0/ffff:ff00::
library/itertools,,:step,elements from seq[start:stop:step]
+library/itertools,,:stop,elements from seq[start:stop:step]
library/linecache,,:sys,"sys:x:3:3:sys:/dev:/bin/sh"
library/logging,,:And,
+library/logging,,:Doing,INFO:root:Doing something
+library/logging,,:Finished,INFO:root:Finished
+library/logging,,:logger,severity:logger name:message
+library/logging,,:Look,WARNING:root:Look before you leap!
+library/logging,,:message,severity:logger name:message
library/logging,,:package1,
library/logging,,:package2,
+library/logging,,:port,host:port
library/logging,,:root,
+library/logging,,:So,INFO:root:So should this
+library/logging,,:So,INFO:So should this
+library/logging,,:Started,INFO:root:Started
library/logging,,:This,
-library/logging,,:port,host:port
+library/logging,,:Watch,WARNING:root:Watch out!
+library/logging.handlers,,:port,host:port
library/mmap,,:i2,obj[i1:i2]
-library/multiprocessing,,:queue,">>> QueueManager.register('get_queue', callable=lambda:queue)"
-library/multiprocessing,,`,">>> l._callmethod('__getitem__', (20,)) # equiv to `l[20]`"
-library/multiprocessing,,`,">>> l._callmethod('__getslice__', (2, 7)) # equiv to `l[2:7]`"
-library/multiprocessing,,`,# `BaseManager`.
-library/multiprocessing,,`,# `Pool.imap()` (which will save on the amount of code needed anyway).
+library/multiprocessing,,`,# Add more tasks using `put()`
library/multiprocessing,,`,# A test file for the `multiprocessing` package
library/multiprocessing,,`,# A test of `multiprocessing.Pool` class
-library/multiprocessing,,`,# Add more tasks using `put()`
+library/multiprocessing,,`,# `BaseManager`.
+library/multiprocessing,,`,`Cluster` is a subclass of `SyncManager` so it allows creation of
library/multiprocessing,,`,# create server for a `HostManager` object
library/multiprocessing,,`,# Depends on `multiprocessing` package -- tested with `processing-0.60`
+library/multiprocessing,,`,`hostname` gives the name of the host. If hostname is not
library/multiprocessing,,`,# in the original order then consider using `Pool.map()` or
+library/multiprocessing,,`,">>> l._callmethod('__getitem__', (20,)) # equiv to `l[20]`"
+library/multiprocessing,,`,">>> l._callmethod('__getslice__', (2, 7)) # equiv to `l[2:7]`"
library/multiprocessing,,`,# Not sure if we should synchronize access to `socket.accept()` method by
library/multiprocessing,,`,# object. (We import `multiprocessing.reduction` to enable this pickling.)
+library/multiprocessing,,`,# `Pool.imap()` (which will save on the amount of code needed anyway).
+library/multiprocessing,,:queue,">>> QueueManager.register('get_queue', callable=lambda:queue)"
library/multiprocessing,,`,# register the Foo class; make `f()` and `g()` accessible via proxy
library/multiprocessing,,`,# register the Foo class; make `g()` and `_h()` accessible via proxy
library/multiprocessing,,`,# register the generator function baz; use `GeneratorProxy` to make proxies
-library/multiprocessing,,`,`Cluster` is a subclass of `SyncManager` so it allows creation of
-library/multiprocessing,,`,`hostname` gives the name of the host. If hostname is not
library/multiprocessing,,`,`slots` is used to specify the number of slots for processes on
+library/nntplib,,:bytes,:bytes
+library/nntplib,,:bytes,"['xref', 'from', ':lines', ':bytes', 'references', 'date', 'message-id', 'subject']"
+library/nntplib,,:lines,:lines
+library/nntplib,,:lines,"['xref', 'from', ':lines', ':bytes', 'references', 'date', 'message-id', 'subject']"
library/optparse,,:len,"del parser.rargs[:len(value)]"
library/os.path,,:foo,c:foo
library/parser,,`,"""Make a function that raises an argument to the exponent `exp`."""
+library/pdb,,:lineno,filename:lineno
+library/pdb,,:lineno,[filename:lineno | bpnumber [bpnumber ...]]
+library/pickle,,:memory,"conn = sqlite3.connect("":memory:"")"
library/posix,,`,"CFLAGS=""`getconf LFS_CFLAGS`"" OPT=""-g -O2 $CFLAGS"""
-library/profile,,:lineno,ncalls tottime percall cumtime percall filename:lineno(function)
+library/pprint,209,::,"'classifiers': ['Development Status :: 4 - Beta',"
+library/pprint,209,::,"'Intended Audience :: Developers',"
+library/pprint,209,::,"'License :: OSI Approved :: MIT License',"
+library/pprint,209,::,"'Natural Language :: English',"
+library/pprint,209,::,"'Operating System :: OS Independent',"
+library/pprint,209,::,"'Programming Language :: Python',"
+library/pprint,209,::,"'Programming Language :: Python :: 2',"
+library/pprint,209,::,"'Programming Language :: Python :: 2.6',"
+library/pprint,209,::,"'Programming Language :: Python :: 2.7',"
+library/pprint,209,::,"'Topic :: Software Development :: Libraries',"
+library/pprint,209,::,"'Topic :: Software Development :: Libraries :: Python Modules'],"
library/profile,,:lineno,filename:lineno(function)
+library/profile,,:lineno,ncalls tottime percall cumtime percall filename:lineno(function)
+library/profile,,:lineno,"(sort by filename:lineno),"
library/pyexpat,,:elem1,<py:elem1 />
library/pyexpat,,:py,"xmlns:py = ""http://www.python.org/ns/"">"
library/repr,,`,"return `obj`"
library/smtplib,,:port,"as well as a regular host:port server."
+library/smtplib,,:port,method must support that as well as a regular host:port
+library/socket,,::,"(10, 1, 6, '', ('2001:888:2000:d::a2', 80, 0, 0))]"
library/socket,,::,'5aef:2b::8'
-library/sqlite3,,:memory,
+library/socket,,:can,"return (can_id, can_dlc, data[:can_dlc])"
+library/socket,,:len,fds.fromstring(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % fds.itemsize)])
+library/sqlite3,,:age,"cur.execute(""select * from people where name_last=:who and age=:age"", {""who"": who, ""age"": age})"
library/sqlite3,,:age,"select name_last, age from people where name_last=:who and age=:age"
-library/sqlite3,,:who,"select name_last, age from people where name_last=:who and age=:age"
-library/ssl,,:My,"Organization Name (eg, company) [Internet Widgits Pty Ltd]:My Organization, Inc."
+library/sqlite3,,:memory,
+library/sqlite3,,:who,"cur.execute(""select * from people where name_last=:who and age=:age"", {""who"": who, ""age"": age})"
library/ssl,,:My,"Organizational Unit Name (eg, section) []:My Group"
+library/ssl,,:My,"Organization Name (eg, company) [Internet Widgits Pty Ltd]:My Organization, Inc."
library/ssl,,:myserver,"Common Name (eg, YOUR name) []:myserver.mygroup.myorganization.com"
library/ssl,,:MyState,State or Province Name (full name) [Some-State]:MyState
library/ssl,,:ops,Email Address []:ops@myserver.mygroup.myorganization.com
library/ssl,,:Some,"Locality Name (eg, city) []:Some City"
library/ssl,,:US,Country Name (2 letter code) [AU]:US
+library/stdtypes,,::,>>> a[::-1].tolist()
+library/stdtypes,,::,>>> a[::2].tolist()
+library/stdtypes,,:end,s[start:end]
+library/stdtypes,,::,>>> hash(v[::-2]) == hash(b'abcefg'[::-2])
library/stdtypes,,:len,s[len(s):len(s)]
-library/stdtypes,,:len,s[len(s):len(s)]
-library/string,,:end,s[start:end]
+library/stdtypes,,::,>>> y = m[::2]
+library/stdtypes,,::,>>> z = y[::-2]
library/string,,:end,s[start:end]
-library/subprocess,,`,"output=`mycmd myarg`"
library/subprocess,,`,"output=`dmesg | grep hda`"
+library/subprocess,,`,"output=`mycmd myarg`"
+library/tarfile,,:bz2,
library/tarfile,,:compression,filemode[:compression]
library/tarfile,,:gz,
-library/tarfile,,:bz2,
+library/tarfile,,:xz,'a:xz'
+library/tarfile,,:xz,'r:xz'
+library/tarfile,,:xz,'w:xz'
library/time,,:mm,
library/time,,:ss,
library/turtle,,::,Example::
-library/urllib,,:port,:port
library/urllib2,,:password,"""joe:password@python.org"""
+library/urllib,,:port,:port
+library/urllib.request,,:close,Connection:close
+library/urllib.request,,:lang,"xmlns=""http://www.w3.org/1999/xhtml"" xml:lang=""en"" lang=""en"">\n\n<head>\n"
+library/urllib.request,,:password,"""joe:password@python.org"""
library/uuid,,:uuid,urn:uuid:12345678-1234-5678-1234-567812345678
-library/xmlrpclib,,:pass,http://user:pass@host:port/path
-library/xmlrpclib,,:pass,user:pass
-library/xmlrpclib,,:port,http://user:pass@host:port/path
+library/xmlrpc.client,,:pass,http://user:pass@host:port/path
+library/xmlrpc.client,,:pass,user:pass
+library/xmlrpc.client,,:port,http://user:pass@host:port/path
+license,,`,"``Software''), to deal in the Software without restriction, including"
+license,,`,"THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,"
+license,,`,* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+license,,`,THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+license,,`,* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
license,,`,THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
license,,:zooko,mailto:zooko@zooko.com
-license,,`,THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-reference/datamodel,,:step,a[i:j:step]
reference/datamodel,,:max,
-reference/expressions,,:index,x[index:index]
+reference/datamodel,,:step,a[i:j:step]
reference/expressions,,:datum,{key:datum...}
reference/expressions,,`,`expressions...`
+reference/expressions,,:index,x[index:index]
reference/grammar,,:output,#diagram:output
reference/grammar,,:rules,#diagram:rules
-reference/grammar,,:token,#diagram:token
reference/grammar,,`,'`' testlist1 '`'
-reference/lexical_analysis,,:fileencoding,# vim:fileencoding=<encoding-name>
+reference/grammar,,:token,#diagram:token
reference/lexical_analysis,,`,", : . ` = ;"
-tutorial/datastructures,,:value,key:value pairs within the braces adds initial key:value pairs
+reference/lexical_analysis,,`,$ ? `
+reference/lexical_analysis,,:fileencoding,# vim:fileencoding=<encoding-name>
tutorial/datastructures,,:value,It is also possible to delete a key:value
-tutorial/stdlib2,,:start,"fields = struct.unpack('<IIIHH', data[start:start+16])"
-tutorial/stdlib2,,:start,extra = data[start:start+extra_size]
-tutorial/stdlib2,,:start,filename = data[start:start+filenamesize]
+tutorial/datastructures,,:value,key:value pairs within the braces adds initial key:value pairs
tutorial/stdlib2,,:config,"logging.warning('Warning:config file %s not found', 'server.conf')"
tutorial/stdlib2,,:config,WARNING:root:Warning:config file server.conf not found
tutorial/stdlib2,,:Critical,CRITICAL:root:Critical error -- shutting down
@@ -141,15 +311,16 @@ tutorial/stdlib2,,:Error,ERROR:root:Error occurred
tutorial/stdlib2,,:root,CRITICAL:root:Critical error -- shutting down
tutorial/stdlib2,,:root,ERROR:root:Error occurred
tutorial/stdlib2,,:root,WARNING:root:Warning:config file server.conf not found
+tutorial/stdlib2,,:start,extra = data[start:start+extra_size]
+tutorial/stdlib2,,:start,"fields = struct.unpack('<IIIHH', data[start:start+16])"
+tutorial/stdlib2,,:start,filename = data[start:start+filenamesize]
tutorial/stdlib2,,:Warning,WARNING:root:Warning:config file server.conf not found
-using/cmdline,,:line,file:line: category: message
using/cmdline,,:category,action:message:category:module:line
+using/cmdline,,:errorhandler,:errorhandler
using/cmdline,,:line,action:message:category:module:line
+using/cmdline,,:line,file:line: category: message
using/cmdline,,:message,action:message:category:module:line
using/cmdline,,:module,action:message:category:module:line
-using/cmdline,,:errorhandler,:errorhandler
-using/windows,162,`,`` this fixes syntax highlighting errors in some editors due to the \\\\ hackery
-using/windows,170,`,``
whatsnew/2.0,418,:len,
whatsnew/2.3,,::,
whatsnew/2.3,,:config,
@@ -163,113 +334,38 @@ whatsnew/2.4,,:System,
whatsnew/2.5,,:memory,:memory:
whatsnew/2.5,,:step,[start:stop:step]
whatsnew/2.5,,:stop,[start:stop:step]
-distutils/examples,267,`,This is the description of the ``foobar`` package.
-faq/programming,,:reduce,"print((lambda Ru,Ro,Iu,Io,IM,Sx,Sy:reduce(lambda x,y:x+y,map(lambda y,"
-faq/programming,,:reduce,"Sx=Sx,Sy=Sy:reduce(lambda x,y:x+y,map(lambda x,xc=Ru,yc=yc,Ru=Ru,Ro=Ro,"
-faq/programming,,:chr,">=4.0) or 1+f(xc,yc,x*x-y*y+xc,2.0*x*y+yc,k-1,f):f(xc,yc,x,y,k,f):chr("
-faq/programming,,::,for x in sequence[::-1]:
-faq/windows,229,:EOF,@setlocal enableextensions & python -x %~f0 %* & goto :EOF
-faq/windows,393,:REG,.py :REG_SZ: c:\<path to python>\python.exe -u %s %s
-library/bisect,32,:hi,all(val >= x for val in a[i:hi])
-library/bisect,42,:hi,all(val > x for val in a[i:hi])
-library/http.client,52,:port,host:port
-library/nntplib,,:bytes,:bytes
-library/nntplib,,:lines,:lines
-library/nntplib,,:lines,"['xref', 'from', ':lines', ':bytes', 'references', 'date', 'message-id', 'subject']"
-library/nntplib,,:bytes,"['xref', 'from', ':lines', ':bytes', 'references', 'date', 'message-id', 'subject']"
-library/pickle,,:memory,"conn = sqlite3.connect("":memory:"")"
-library/profile,,:lineno,"(sort by filename:lineno),"
-library/socket,,::,"(10, 1, 6, '', ('2001:888:2000:d::a2', 80, 0, 0))]"
-library/stdtypes,,:end,s[start:end]
-library/stdtypes,,:end,s[start:end]
-library/urllib.request,,:close,Connection:close
-library/urllib.request,,:password,"""joe:password@python.org"""
-library/urllib.request,,:lang,"xmlns=""http://www.w3.org/1999/xhtml"" xml:lang=""en"" lang=""en"">\n\n<head>\n"
-library/xmlrpc.client,103,:pass,http://user:pass@host:port/path
-library/xmlrpc.client,103,:port,http://user:pass@host:port/path
-library/xmlrpc.client,103,:pass,user:pass
-license,,`,* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
-license,,`,* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
-license,,`,"``Software''), to deal in the Software without restriction, including"
-license,,`,"THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,"
-reference/lexical_analysis,704,`,$ ? `
+whatsnew/2.7,1619,::,"ParseResult(scheme='http', netloc='[1080::8:800:200C:417A]',"
+whatsnew/2.7,1619,::,>>> urlparse.urlparse('http://[1080::8:800:200C:417A]/foo')
whatsnew/2.7,735,:Sunday,'2009:4:Sunday'
-whatsnew/2.7,862,::,"export PYTHONWARNINGS=all,error:::Cookie:0"
whatsnew/2.7,862,:Cookie,"export PYTHONWARNINGS=all,error:::Cookie:0"
-whatsnew/2.7,1619,::,>>> urlparse.urlparse('http://[1080::8:800:200C:417A]/foo')
-whatsnew/2.7,1619,::,"ParseResult(scheme='http', netloc='[1080::8:800:200C:417A]',"
-library/configparser,,`,# Set the optional `raw` argument of get() to True if you wish to disable
-library/configparser,,`,# The optional `vars` argument is a dict with members that will take
-library/configparser,,`,# The optional `fallback` argument can be used to provide a fallback value
-library/configparser,,:option,${section:option}
-library/configparser,,:system,path: ${Common:system_dir}/Library/Frameworks/
-library/configparser,,:home,my_dir: ${Common:home_dir}/twosheds
-library/configparser,,:path,python_dir: ${Frameworks:path}/Python/Versions/${Frameworks:Python}
-library/configparser,,:Python,python_dir: ${Frameworks:path}/Python/Versions/${Frameworks:Python}
-library/pdb,,:lineno,[filename:lineno | bpnumber [bpnumber ...]]
-library/pdb,,:lineno,filename:lineno
-library/logging,,:Watch,WARNING:root:Watch out!
-library/logging,,:So,INFO:root:So should this
-library/logging,,:Started,INFO:root:Started
-library/logging,,:Doing,INFO:root:Doing something
-library/logging,,:Finished,INFO:root:Finished
-library/logging,,:Look,WARNING:root:Look before you leap!
-library/logging,,:So,INFO:So should this
-library/logging,,:logger,severity:logger name:message
-library/logging,,:message,severity:logger name:message
-whatsnew/3.2,,:directory,... ${buildout:directory}/downloads/dist
-whatsnew/3.2,,:location,... zope9-location = ${zope9:location}
-whatsnew/3.2,,:prefix,... zope-conf = ${custom:prefix}/etc/zope.conf
-howto/logging,,:root,WARNING:root:Watch out!
-howto/logging,,:Watch,WARNING:root:Watch out!
-howto/logging,,:root,DEBUG:root:This message should go to the log file
-howto/logging,,:This,DEBUG:root:This message should go to the log file
-howto/logging,,:root,INFO:root:So should this
-howto/logging,,:So,INFO:root:So should this
-howto/logging,,:root,"WARNING:root:And this, too"
-howto/logging,,:And,"WARNING:root:And this, too"
-howto/logging,,:root,INFO:root:Started
-howto/logging,,:Started,INFO:root:Started
-howto/logging,,:root,INFO:root:Doing something
-howto/logging,,:Doing,INFO:root:Doing something
-howto/logging,,:root,INFO:root:Finished
-howto/logging,,:Finished,INFO:root:Finished
-howto/logging,,:root,WARNING:root:Look before you leap!
-howto/logging,,:Look,WARNING:root:Look before you leap!
-howto/logging,,:This,DEBUG:This message should appear on the console
-howto/logging,,:So,INFO:So should this
-howto/logging,,:And,"WARNING:And this, too"
-howto/logging,,:logger,severity:logger name:message
-howto/logging,,:message,severity:logger name:message
-library/logging.handlers,,:port,host:port
-library/imaplib,116,:MM,"""DD-Mmm-YYYY HH:MM:SS"
-library/imaplib,116,:SS,"""DD-Mmm-YYYY HH:MM:SS"
-whatsnew/3.2,,::,"$ export PYTHONWARNINGS='ignore::RuntimeWarning::,once::UnicodeWarning::'"
-howto/pyporting,75,::,# make sure to use :: Python *and* :: Python :: 3 so
-howto/pyporting,75,::,"'Programming Language :: Python',"
-howto/pyporting,75,::,'Programming Language :: Python :: 3'
-whatsnew/3.2,,:gz,">>> with tarfile.open(name='myarchive.tar.gz', mode='w:gz') as tf:"
-whatsnew/3.2,,:directory,${buildout:directory}/downloads/dist
-whatsnew/3.2,,:location,zope9-location = ${zope9:location}
-whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf
-whatsnew/3.2,,:beef,>>> urllib.parse.urlparse('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/')
-whatsnew/3.2,,:cafe,>>> urllib.parse.urlparse('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/')
+whatsnew/2.7,862,::,"export PYTHONWARNINGS=all,error:::Cookie:0"
+whatsnew/3.2,,:affe,"netloc='[dead:beef:cafe:5417:affe:8FA3:deaf:feed]',"
whatsnew/3.2,,:affe,>>> urllib.parse.urlparse('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/')
-whatsnew/3.2,,:deaf,>>> urllib.parse.urlparse('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/')
-whatsnew/3.2,,:feed,>>> urllib.parse.urlparse('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/')
whatsnew/3.2,,:beef,"netloc='[dead:beef:cafe:5417:affe:8FA3:deaf:feed]',"
+whatsnew/3.2,,:beef,>>> urllib.parse.urlparse('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/')
whatsnew/3.2,,:cafe,"netloc='[dead:beef:cafe:5417:affe:8FA3:deaf:feed]',"
-whatsnew/3.2,,:affe,"netloc='[dead:beef:cafe:5417:affe:8FA3:deaf:feed]',"
+whatsnew/3.2,,:cafe,>>> urllib.parse.urlparse('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/')
whatsnew/3.2,,:deaf,"netloc='[dead:beef:cafe:5417:affe:8FA3:deaf:feed]',"
+whatsnew/3.2,,:deaf,>>> urllib.parse.urlparse('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/')
+whatsnew/3.2,,:directory,... ${buildout:directory}/downloads/dist
+whatsnew/3.2,,:directory,${buildout:directory}/downloads/dist
+whatsnew/3.2,,::,"$ export PYTHONWARNINGS='ignore::RuntimeWarning::,once::UnicodeWarning::'"
whatsnew/3.2,,:feed,"netloc='[dead:beef:cafe:5417:affe:8FA3:deaf:feed]',"
-library/pprint,209,::,"'classifiers': ['Development Status :: 4 - Beta',"
-library/pprint,209,::,"'Intended Audience :: Developers',"
-library/pprint,209,::,"'License :: OSI Approved :: MIT License',"
-library/pprint,209,::,"'Natural Language :: English',"
-library/pprint,209,::,"'Operating System :: OS Independent',"
-library/pprint,209,::,"'Programming Language :: Python',"
-library/pprint,209,::,"'Programming Language :: Python :: 2',"
-library/pprint,209,::,"'Programming Language :: Python :: 2.6',"
-library/pprint,209,::,"'Programming Language :: Python :: 2.7',"
-library/pprint,209,::,"'Topic :: Software Development :: Libraries',"
-library/pprint,209,::,"'Topic :: Software Development :: Libraries :: Python Modules'],"
+whatsnew/3.2,,:feed,>>> urllib.parse.urlparse('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/')
+whatsnew/3.2,,:gz,">>> with tarfile.open(name='myarchive.tar.gz', mode='w:gz') as tf:"
+whatsnew/3.2,,:location,... zope9-location = ${zope9:location}
+whatsnew/3.2,,:location,zope9-location = ${zope9:location}
+whatsnew/3.2,,:prefix,... zope-conf = ${custom:prefix}/etc/zope.conf
+whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf
+whatsnew/news,,:platform,:platform:
+whatsnew/news,,:password,: Unquote before b64encoding user:password during Basic
+whatsnew/news,,:close,Connection:close header.
+whatsnew/news,,:PythonCmd,"With Tk < 8.5 _tkinter.c:PythonCmd() raised UnicodeDecodeError, caused"
+whatsnew/news,,:close,: Connection:close header is sent by requests using URLOpener
+whatsnew/news,,::,": Fix FTP tests for IPv6, bind to ""::1"" instead of ""localhost""."
+whatsnew/news,,:test,: test_subprocess:test_leaking_fds_on_error no longer gives a
+whatsnew/news,,:test,: Fix test_posix:test_getgroups failure under Solaris. Patch
+whatsnew/news,,:Olimit,Drop -OPT:Olimit compiler option.
+whatsnew/news,,:MAXYEAR,timedelta from date or datetime falls outside of the MINYEAR:MAXYEAR range.
+whatsnew/news,,:bz2,with mode 'r' or 'r:bz2' and a fileobj argument that contained no data or
+whatsnew/news,,:db2,: Add configure option --with-dbmliborder=db1:db2:... to specify
diff --git a/Doc/tutorial/classes.rst b/Doc/tutorial/classes.rst
index cff2710..3283e54 100644
--- a/Doc/tutorial/classes.rst
+++ b/Doc/tutorial/classes.rst
@@ -184,7 +184,6 @@ The output of the example code is:
.. code-block:: none
-
After local assignment: test spam
After nonlocal assignment: nonlocal spam
After global assignment: nonlocal spam
@@ -698,9 +697,9 @@ example, the following code will print B, C, D in that order::
class D(C):
pass
- for c in [B, C, D]:
+ for cls in [B, C, D]:
try:
- raise c()
+ raise cls()
except D:
print("D")
except C:
diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst
index e008dd8..36abc9c 100644
--- a/Doc/tutorial/datastructures.rst
+++ b/Doc/tutorial/datastructures.rst
@@ -19,13 +19,13 @@ objects:
.. method:: list.append(x)
:noindex:
- Add an item to the end of the list; equivalent to ``a[len(a):] = [x]``.
+ Add an item to the end of the list. Equivalent to ``a[len(a):] = [x]``.
.. method:: list.extend(L)
:noindex:
- Extend the list by appending all the items in the given list; equivalent to
+ Extend the list by appending all the items in the given list. Equivalent to
``a[len(a):] = L``.
@@ -40,8 +40,8 @@ objects:
.. method:: list.remove(x)
:noindex:
- Remove the first item from the list whose value is *x*. It is an error if there
- is no such item.
+ Remove the first item from the list whose value is *x*. It is an error if
+ there is no such item.
.. method:: list.pop([i])
@@ -70,13 +70,14 @@ objects:
.. method:: list.sort()
:noindex:
- Sort the items of the list, in place.
+ Sort the items of the list in place.
.. method:: list.reverse()
:noindex:
- Reverse the elements of the list, in place.
+ Reverse the elements of the list in place.
+
An example that uses most of the list methods::
@@ -99,6 +100,10 @@ An example that uses most of the list methods::
>>> a
[-1, 1, 66.25, 333, 333, 1234.5]
+You might have noticed that methods like ``insert``, ``remove`` or ``sort`` that
+modify the list have no return value printed -- they return ``None``. [1]_ This
+is a design principle for all mutable data structures in Python.
+
.. _tut-lists-as-stacks:
@@ -480,7 +485,7 @@ using a non-existent key.
Performing ``list(d.keys())`` on a dictionary returns a list of all the keys
used in the dictionary, in arbitrary order (if you want it sorted, just use
-``sorted(d.keys())`` instead). [1]_ To check whether a single key is in the
+``sorted(d.keys())`` instead). [2]_ To check whether a single key is in the
dictionary, use the :keyword:`in` keyword.
Here is a small example using a dictionary::
@@ -677,6 +682,9 @@ interpreter will raise a :exc:`TypeError` exception.
.. rubric:: Footnotes
-.. [1] Calling ``d.keys()`` will return a :dfn:`dictionary view` object. It
+.. [1] Other languages may return the mutated object, which allows method
+ chaining, such as ``d->insert("a")->remove("b")->sort();``.
+
+.. [2] Calling ``d.keys()`` will return a :dfn:`dictionary view` object. It
supports operations like membership test and iteration, but its contents
are not independent of the original dictionary -- it is only a *view*.
diff --git a/Doc/tutorial/interpreter.rst b/Doc/tutorial/interpreter.rst
index d61dafc..cdc2bf2 100644
--- a/Doc/tutorial/interpreter.rst
+++ b/Doc/tutorial/interpreter.rst
@@ -10,13 +10,13 @@ Using the Python Interpreter
Invoking the Interpreter
========================
-The Python interpreter is usually installed as :file:`/usr/local/bin/python3.2`
+The Python interpreter is usually installed as :file:`/usr/local/bin/python3.3`
on those machines where it is available; putting :file:`/usr/local/bin` in your
Unix shell's search path makes it possible to start it by typing the command:
.. code-block:: text
- python3.2
+ python3.3
to the shell. [#]_ Since the choice of the directory where the interpreter lives
is an installation option, other places are possible; check with your local
@@ -24,11 +24,11 @@ Python guru or system administrator. (E.g., :file:`/usr/local/python` is a
popular alternative location.)
On Windows machines, the Python installation is usually placed in
-:file:`C:\\Python32`, though you can change this when you're running the
+:file:`C:\\Python33`, though you can change this when you're running the
installer. To add this directory to your path, you can type the following
command into the command prompt in a DOS box::
- set path=%path%;C:\python32
+ set path=%path%;C:\python33
Typing an end-of-file character (:kbd:`Control-D` on Unix, :kbd:`Control-Z` on
Windows) at the primary prompt causes the interpreter to exit with a zero exit
@@ -95,8 +95,8 @@ with the *secondary prompt*, by default three dots (``...``). The interpreter
prints a welcome message stating its version number and a copyright notice
before printing the first prompt::
- $ python3.2
- Python 3.2.3 (default, May 3 2012, 15:54:42)
+ $ python3.3
+ Python 3.3 (default, Sep 24 2012, 09:25:04)
[GCC 4.6.3] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>>
@@ -149,7 +149,7 @@ Executable Python Scripts
On BSD'ish Unix systems, Python scripts can be made directly executable, like
shell scripts, by putting the line ::
- #! /usr/bin/env python3.2
+ #! /usr/bin/env python3.3
(assuming that the interpreter is on the user's :envvar:`PATH`) at the beginning
of the script and giving the file an executable mode. The ``#!`` must be the
diff --git a/Doc/tutorial/introduction.rst b/Doc/tutorial/introduction.rst
index b6d94ac..c07a668 100644
--- a/Doc/tutorial/introduction.rst
+++ b/Doc/tutorial/introduction.rst
@@ -390,7 +390,7 @@ The built-in function :func:`len` returns the length of a string::
.. seealso::
- :ref:`typesseq`
+ :ref:`textseq`
Strings are examples of *sequence types*, and support the common
operations supported by such types.
diff --git a/Doc/tutorial/stdlib.rst b/Doc/tutorial/stdlib.rst
index 128e6a6..b5771f6 100644
--- a/Doc/tutorial/stdlib.rst
+++ b/Doc/tutorial/stdlib.rst
@@ -15,7 +15,7 @@ operating system::
>>> import os
>>> os.getcwd() # Return the current working directory
- 'C:\\Python31'
+ 'C:\\Python33'
>>> os.chdir('/server/accesslogs') # Change current working directory
>>> os.system('mkdir today') # Run the command mkdir in the system shell
0
diff --git a/Doc/tutorial/stdlib2.rst b/Doc/tutorial/stdlib2.rst
index a9ae871..6a48984 100644
--- a/Doc/tutorial/stdlib2.rst
+++ b/Doc/tutorial/stdlib2.rst
@@ -141,7 +141,9 @@ standard size and in little-endian byte order::
import struct
- data = open('myfile.zip', 'rb').read()
+ with open('myfile.zip', 'rb') as f:
+ data = f.read()
+
start = 0
for i in range(3): # show the first 3 file headers
start += 14
@@ -273,7 +275,7 @@ applications include caching objects that are expensive to create::
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
d['primary'] # entry was automatically removed
- File "C:/python31/lib/weakref.py", line 46, in __getitem__
+ File "C:/python33/lib/weakref.py", line 46, in __getitem__
o = self.data[key]()
KeyError: 'primary'
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index cd4d02f..dbe6c47 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -24,7 +24,7 @@ Command line
When invoking Python, you may specify any of these options::
- python [-bBdEhiORqsSuvVWx?] [-c command | -m module-name | script | - ] [args]
+ python [-bBdEhiOqsSuvVWx?] [-c command | -m module-name | script | - ] [args]
The most common use case is, of course, a simple invocation of a script::
@@ -229,23 +229,22 @@ Miscellaneous options
.. cmdoption:: -R
- Turn on hash randomization, so that the :meth:`__hash__` values of str, bytes
- and datetime objects are "salted" with an unpredictable random value.
- Although they remain constant within an individual Python process, they are
- not predictable between repeated invocations of Python.
+ Kept for compatibility. On Python 3.3 and greater, hash randomization is
+ turned on by default.
- This is intended to provide protection against a denial-of-service caused by
- carefully-chosen inputs that exploit the worst case performance of a dict
- construction, O(n^2) complexity. See
- http://www.ocert.org/advisories/ocert-2011-003.html for details.
+ On previous versions of Python, this option turns on hash randomization,
+ so that the :meth:`__hash__` values of str, bytes and datetime
+ are "salted" with an unpredictable random value. Although they remain
+ constant within an individual Python process, they are not predictable
+ between repeated invocations of Python.
- Changing hash values affects the order in which keys are retrieved from a
- dict. Although Python has never made guarantees about this ordering (and it
- typically varies between 32-bit and 64-bit builds), enough real-world code
- implicitly relies on this non-guaranteed behavior that the randomization is
- disabled by default.
+ Hash randomization is intended to provide protection against a
+ denial-of-service caused by carefully-chosen inputs that exploit the worst
+ case performance of a dict construction, O(n^2) complexity. See
+ http://www.ocert.org/advisories/ocert-2011-003.html for details.
- See also :envvar:`PYTHONHASHSEED`.
+ :envvar:`PYTHONHASHSEED` allows you to set a fixed value for the hash
+ seed secret.
.. versionadded:: 3.2.3
@@ -263,7 +262,9 @@ Miscellaneous options
.. cmdoption:: -S
Disable the import of the module :mod:`site` and the site-dependent
- manipulations of :data:`sys.path` that it entails.
+ manipulations of :data:`sys.path` that it entails. Also disable these
+ manipulations if :mod:`site` is explicitly imported later (call
+ :func:`site.main` if you want them to be triggered).
.. cmdoption:: -u
@@ -475,7 +476,7 @@ conflict.
.. envvar:: PYTHONCASEOK
If this is set, Python ignores case in :keyword:`import` statements. This
- only works on Windows, OS X, and OS/2.
+ only works on Windows and OS X.
.. envvar:: PYTHONDONTWRITEBYTECODE
@@ -487,9 +488,8 @@ conflict.
.. envvar:: PYTHONHASHSEED
- If this variable is set to ``random``, the effect is the same as specifying
- the :option:`-R` option: a random value is used to seed the hashes of str,
- bytes and datetime objects.
+ If this variable is not set or set to ``random``, a random value is used
+ to seed the hashes of str, bytes and datetime objects.
If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a fixed
seed for generating the hash() of the types covered by the hash
@@ -500,8 +500,7 @@ conflict.
values.
The integer must be a decimal number in the range [0,4294967295]. Specifying
- the value 0 will lead to the same hash values as when hash randomization is
- disabled.
+ the value 0 will disable hash randomization.
.. versionadded:: 3.2.3
@@ -531,8 +530,8 @@ conflict.
Defines the :data:`user base directory <site.USER_BASE>`, which is used to
compute the path of the :data:`user site-packages directory <site.USER_SITE>`
- and :ref:`Distutils installation paths <inst-alt-install-user>` for ``python
- setup.py install --user``.
+ and :ref:`Distutils installation paths <inst-alt-install-user>` for
+ ``python setup.py install --user``.
.. seealso::
@@ -551,6 +550,14 @@ conflict.
separated string, it is equivalent to specifying :option:`-W` multiple
times.
+.. envvar:: PYTHONFAULTHANDLER
+
+ If this environment variable is set, :func:`faulthandler.enable` is called
+ at startup: install a handler for :const:`SIGSEGV`, :const:`SIGFPE`,
+ :const:`SIGABRT`, :const:`SIGBUS` and :const:`SIGILL` signals to dump the
+ Python traceback. This is equivalent to :option:`-X` ``faulthandler``
+ option.
+
Debug-mode variables
~~~~~~~~~~~~~~~~~~~~
diff --git a/Doc/using/index.rst b/Doc/using/index.rst
index 1201153..502afa9 100644
--- a/Doc/using/index.rst
+++ b/Doc/using/index.rst
@@ -17,4 +17,4 @@ interpreter and things that make working with Python easier.
unix.rst
windows.rst
mac.rst
-
+ scripts.rst
diff --git a/Doc/using/scripts.rst b/Doc/using/scripts.rst
new file mode 100644
index 0000000..2c87416
--- /dev/null
+++ b/Doc/using/scripts.rst
@@ -0,0 +1,12 @@
+.. _tools-and-scripts:
+
+Additional Tools and Scripts
+============================
+
+.. _scripts-pyvenv:
+
+pyvenv - Creating virtual environments
+--------------------------------------
+
+.. include:: venv-create.inc
+
diff --git a/Doc/using/venv-create.inc b/Doc/using/venv-create.inc
new file mode 100644
index 0000000..5fdbc9b
--- /dev/null
+++ b/Doc/using/venv-create.inc
@@ -0,0 +1,85 @@
+Creation of :ref:`virtual environments <venv-def>` is done by executing the
+``pyvenv`` script::
+
+ pyvenv /path/to/new/virtual/environment
+
+Running this command creates the target directory (creating any parent
+directories that don't exist already) and places a ``pyvenv.cfg`` file in it
+with a ``home`` key pointing to the Python installation the command was run
+from. It also creates a ``bin`` (or ``Scripts`` on Windows) subdirectory
+containing a copy of the ``python`` binary (or binaries, in the case of
+Windows). It also creates an (initially empty) ``lib/pythonX.Y/site-packages``
+subdirectory (on Windows, this is ``Lib\site-packages``).
+
+.. highlight:: none
+
+On Windows, you may have to invoke the ``pyvenv`` script as follows, if you
+don't have the relevant PATH and PATHEXT settings::
+
+ c:\Temp>c:\Python33\python c:\Python33\Tools\Scripts\pyvenv.py myenv
+
+or equivalently::
+
+ c:\Temp>c:\Python33\python -m venv myenv
+
+The command, if run with ``-h``, will show the available options::
+
+ usage: pyvenv [-h] [--system-site-packages] [--symlinks] [--clear]
+ [--upgrade] ENV_DIR [ENV_DIR ...]
+
+ Creates virtual Python environments in one or more target directories.
+
+ positional arguments:
+ ENV_DIR A directory to create the environment in.
+
+ optional arguments:
+ -h, --help show this help message and exit
+ --system-site-packages Give access to the global site-packages dir to the
+ virtual environment.
+ --symlinks Try to use symlinks rather than copies, when symlinks
+ are not the default for the platform.
+ --clear Delete the environment directory if it already exists.
+ If not specified and the directory exists, an error is
+ raised.
+ --upgrade Upgrade the environment directory to use this version
+ of Python, assuming Python has been upgraded in-place.
+
+If the target directory already exists an error will be raised, unless
+the ``--clear`` or ``--upgrade`` option was provided.
+
+The created ``pyvenv.cfg`` file also includes the
+``include-system-site-packages`` key, set to ``true`` if ``venv`` is
+run with the ``--system-site-packages`` option, ``false`` otherwise.
+
+Multiple paths can be given to ``pyvenv``, in which case an identical
+virtualenv will be created, according to the given options, at each
+provided path.
+
+Once a venv has been created, it can be "activated" using a script in the
+venv's binary directory. The invocation of the script is platform-specific: on
+a Posix platform, you would typically do::
+
+ $ source <venv>/bin/activate
+
+whereas on Windows, you might do::
+
+ C:\> <venv>/Scripts/activate
+
+if you are using the ``cmd.exe`` shell, or perhaps::
+
+ PS C:\> <venv>/Scripts/Activate.ps1
+
+if you use PowerShell.
+
+You don't specifically *need* to activate an environment; activation just
+prepends the venv's binary directory to your path, so that "python" invokes the
+venv's Python interpreter and you can run installed scripts without having to
+use their full path. However, all scripts installed in a venv should be
+runnable without activating it, and run with the venv's Python automatically.
+
+You can deactivate a venv by typing "deactivate" in your shell. The exact
+mechanism is platform-specific: for example, the Bash activation script defines
+a "deactivate" function, whereas on Windows there are separate scripts called
+``deactivate.bat`` and ``Deactivate.ps1`` which are installed when the venv is
+created.
+
diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst
index 742a290..ae25215 100644
--- a/Doc/using/windows.rst
+++ b/Doc/using/windows.rst
@@ -82,6 +82,8 @@ In order to run Python flawlessly, you might have to change certain environment
settings in Windows.
+.. _setting-envvars:
+
Excursus: Setting environment variables
---------------------------------------
@@ -130,21 +132,33 @@ Consult :command:`set /?` for details on this behaviour.
Setting Environment variables, Louis J. Farrugia
+.. _windows-path-mod:
+
Finding the Python executable
-----------------------------
+.. versionchanged:: 3.3
+
Besides using the automatically created start menu entry for the Python
-interpreter, you might want to start Python in the DOS prompt. To make this
-work, you need to set your :envvar:`%PATH%` environment variable to include the
-directory of your Python distribution, delimited by a semicolon from other
-entries. An example variable could look like this (assuming the first two
-entries are Windows' default)::
+interpreter, you might want to start Python in the command prompt. As of
+Python 3.3, the installer has an option to set that up for you.
- C:\WINDOWS\system32;C:\WINDOWS;C:\Python25
+At the "Customize Python 3.3" screen, an option called
+"Add python.exe to search path" can be enabled to have the installer place
+your installation into the :envvar:`%PATH%`. This allows you to type
+:command:`python` to run the interpreter. Thus, you can also execute your
+scripts with command line options, see :ref:`using-on-cmdline` documentation.
-Typing :command:`python` on your command prompt will now fire up the Python
-interpreter. Thus, you can also execute your scripts with command line options,
-see :ref:`using-on-cmdline` documentation.
+If you don't enable this option at install time, you can always re-run the
+installer to choose it.
+
+The alternative is manually modifying the :envvar:`%PATH%` using the
+directions in :ref:`setting-envvars`. You need to set your :envvar:`%PATH%`
+environment variable to include the directory of your Python distribution,
+delimited by a semicolon from other entries. An example variable could look
+like this (assuming the first two entries are Windows' default)::
+
+ C:\WINDOWS\system32;C:\WINDOWS;C:\Python33
Finding modules
@@ -203,13 +217,19 @@ The end result of all this is:
Executing scripts
-----------------
-Python scripts (files with the extension ``.py``) will be executed by
-:program:`python.exe` by default. This executable opens a terminal, which stays
-open even if the program uses a GUI. If you do not want this to happen, use the
-extension ``.pyw`` which will cause the script to be executed by
-:program:`pythonw.exe` by default (both executables are located in the top-level
-of your Python installation directory). This suppresses the terminal window on
-startup.
+As of Python 3.3, Python includes a launcher which facilitates running Python
+scripts. See :ref:`launcher` for more information.
+
+Executing scripts without the Python launcher
+---------------------------------------------
+
+Without the Python launcher installed, Python scripts (files with the extension
+``.py``) will be executed by :program:`python.exe` by default. This executable
+opens a terminal, which stays open even if the program uses a GUI. If you do
+not want this to happen, use the extension ``.pyw`` which will cause the script
+to be executed by :program:`pythonw.exe` by default (both executables are
+located in the top-level of your Python installation directory). This
+suppresses the terminal window on startup.
You can also make all ``.py`` scripts execute with :program:`pythonw.exe`,
setting this through the usual facilities, for example (might require
@@ -225,6 +245,250 @@ administrative rights):
ftype Python.File=C:\Path\to\pythonw.exe "%1" %*
+.. _launcher:
+
+Python Launcher for Windows
+===========================
+
+.. versionadded:: 3.3
+
+The Python launcher for Windows is a utility which aids in the location and
+execution of different Python versions. It allows scripts (or the
+command-line) to indicate a preference for a specific Python version, and
+will locate and execute that version.
+
+Getting started
+---------------
+
+From the command-line
+^^^^^^^^^^^^^^^^^^^^^
+
+You should ensure the launcher is on your PATH - depending on how it was
+installed it may already be there, but check just in case it is not.
+
+From a command-prompt, execute the following command:
+
+::
+
+ py
+
+You should find that the latest version of Python 2.x you have installed is
+started - it can be exited as normal, and any additional command-line
+arguments specified will be sent directly to Python.
+
+If you have multiple versions of Python 2.x installed (e.g., 2.6 and 2.7) you
+will have noticed that Python 2.7 was started - to launch Python 2.6, try the
+command:
+
+::
+
+ py -2.6
+
+If you have a Python 3.x installed, try the command:
+
+::
+
+ py -3
+
+You should find the latest version of Python 3.x starts.
+
+From a script
+^^^^^^^^^^^^^
+
+Let's create a test Python script - create a file called ``hello.py`` with the
+following contents
+
+::
+
+ #! python
+ import sys
+ sys.stdout.write("hello from Python %s\n" % (sys.version,))
+
+From the directory in which hello.py lives, execute the command:
+
+::
+
+ py hello.py
+
+You should notice the version number of your latest Python 2.x installation
+is printed. Now try changing the first line to be:
+
+::
+
+ #! python3
+
+Re-executing the command should now print the latest Python 3.x information.
+As with the above command-line examples, you can specify a more explicit
+version qualifier. Assuming you have Python 2.6 installed, try changing the
+first line to ``#! python2.6`` and you should find the 2.6 version
+information printed.
+
+From file associations
+^^^^^^^^^^^^^^^^^^^^^^
+
+The launcher should have been associated with Python files (i.e. ``.py``,
+``.pyw``, ``.pyc``, ``.pyo`` files) when it was installed. This means that
+when you double-click on one of these files from Windows explorer the launcher
+will be used, and therefore you can use the same facilities described above to
+have the script specify the version which should be used.
+
+The key benefit of this is that a single launcher can support multiple Python
+versions at the same time depending on the contents of the first line.
+
+Shebang Lines
+-------------
+
+If the first line of a script file starts with ``#!``, it is known as a
+"shebang" line. Linux and other Unix like operating systems have native
+support for such lines and are commonly used on such systems to indicate how
+a script should be executed. This launcher allows the same facilities to be
+using with Python scripts on Windows and the examples above demonstrate their
+use.
+
+To allow shebang lines in Python scripts to be portable between Unix and
+Windows, this launcher supports a number of 'virtual' commands to specify
+which interpreter to use. The supported virtual commands are:
+
+* ``/usr/bin/env python``
+* ``/usr/bin/python``
+* ``/usr/local/bin/python``
+* ``python``
+
+For example, if the first line of your script starts with
+
+::
+
+ #! /usr/bin/python
+
+The default Python will be located and used. As many Python scripts written
+to work on Unix will already have this line, you should find these scripts can
+be used by the launcher without modification. If you are writing a new script
+on Windows which you hope will be useful on Unix, you should use one of the
+shebang lines starting with ``/usr``.
+
+Arguments in shebang lines
+--------------------------
+
+The shebang lines can also specify additional options to be passed to the
+Python interpreter. For example, if you have a shebang line:
+
+::
+
+ #! /usr/bin/python -v
+
+Then Python will be started with the ``-v`` option
+
+Customization
+-------------
+
+Customization via INI files
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ Two .ini files will be searched by the launcher - ``py.ini`` in the
+ current user's "application data" directory (i.e. the directory returned
+ by calling the Windows function SHGetFolderPath with CSIDL_LOCAL_APPDATA)
+ and ``py.ini`` in the same directory as the launcher. The same .ini
+ files are used for both the 'console' version of the launcher (i.e.
+ py.exe) and for the 'windows' version (i.e. pyw.exe)
+
+ Customization specified in the "application directory" will have
+ precedence over the one next to the executable, so a user, who may not
+ have write access to the .ini file next to the launcher, can override
+ commands in that global .ini file)
+
+Customizing default Python versions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In some cases, a version qualifier can be included in a command to dictate
+which version of Python will be used by the command. A version qualifier
+starts with a major version number and can optionally be followed by a period
+('.') and a minor version specifier. If the minor qualifier is specified, it
+may optionally be followed by "-32" to indicate the 32-bit implementation of
+that version be used.
+
+For example, a shebang line of ``#!python`` has no version qualifier, while
+``#!python3`` has a version qualifier which specifies only a major version.
+
+If no version qualifiers are found in a command, the environment variable
+``PY_PYTHON`` can be set to specify the default version qualifier - the default
+value is "2". Note this value could specify just a major version (e.g. "2") or
+a major.minor qualifier (e.g. "2.6"), or even major.minor-32.
+
+If no minor version qualifiers are found, the environment variable
+``PY_PYTHON{major}`` (where ``{major}`` is the current major version qualifier
+as determined above) can be set to specify the full version. If no such option
+is found, the launcher will enumerate the installed Python versions and use
+the latest minor release found for the major version, which is likely,
+although not guaranteed, to be the most recently installed version in that
+family.
+
+On 64-bit Windows with both 32-bit and 64-bit implementations of the same
+(major.minor) Python version installed, the 64-bit version will always be
+preferred. This will be true for both 32-bit and 64-bit implementations of the
+launcher - a 32-bit launcher will prefer to execute a 64-bit Python installation
+of the specified version if available. This is so the behavior of the launcher
+can be predicted knowing only what versions are installed on the PC and
+without regard to the order in which they were installed (i.e., without knowing
+whether a 32 or 64-bit version of Python and corresponding launcher was
+installed last). As noted above, an optional "-32" suffix can be used on a
+version specifier to change this behaviour.
+
+Examples:
+
+* If no relevant options are set, the commands ``python`` and
+ ``python2`` will use the latest Python 2.x version installed and
+ the command ``python3`` will use the latest Python 3.x installed.
+
+* The commands ``python3.1`` and ``python2.7`` will not consult any
+ options at all as the versions are fully specified.
+
+* If ``PY_PYTHON=3``, the commands ``python`` and ``python3`` will both use
+ the latest installed Python 3 version.
+
+* If ``PY_PYTHON=3.1-32``, the command ``python`` will use the 32-bit
+ implementation of 3.1 whereas the command ``python3`` will use the latest
+ installed Python (PY_PYTHON was not considered at all as a major
+ version was specified.)
+
+* If ``PY_PYTHON=3`` and ``PY_PYTHON3=3.1``, the commands
+ ``python`` and ``python3`` will both use specifically 3.1
+
+In addition to environment variables, the same settings can be configured
+in the .INI file used by the launcher. The section in the INI file is
+called ``[defaults]`` and the key name will be the same as the
+environment variables without the leading ``PY_`` prefix (and note that
+the key names in the INI file are case insensitive.) The contents of
+an environment variable will override things specified in the INI file.
+
+For example:
+
+* Setting ``PY_PYTHON=3.1`` is equivalent to the INI file containing:
+
+::
+
+ [defaults]
+ python=3.1
+
+* Setting ``PY_PYTHON=3`` and ``PY_PYTHON3=3.1`` is equivalent to the INI file
+ containing:
+
+::
+
+ [defaults]
+ python=3
+ python3=3.1
+
+Diagnostics
+-----------
+
+If an environment variable ``PYLAUNCH_DEBUG`` is set (to any value), the
+launcher will print diagnostic information to stderr (i.e. to the console).
+While this information manages to be simultaneously verbose *and* terse, it
+should allow you to see what versions of Python were located, why a
+particular version was chosen and the exact command-line used to execute the
+target Python.
+
+
Additional modules
==================
@@ -341,3 +605,7 @@ Other resources
`A Python for Windows Tutorial <http://www.imladris.com/Scripts/PythonForWindows.html>`_
by Amanda Birmingham, 2004
+ :pep:`397` - Python launcher for Windows
+ The proposal for the launcher to be included in the Python distribution.
+
+
diff --git a/Doc/whatsnew/2.0.rst b/Doc/whatsnew/2.0.rst
index 850e57d..64b908b 100644
--- a/Doc/whatsnew/2.0.rst
+++ b/Doc/whatsnew/2.0.rst
@@ -166,7 +166,7 @@ encoding. Encodings are named by strings, such as ``'ascii'``, ``'utf-8'``,
registering new encodings that are then available throughout a Python program.
If an encoding isn't specified, the default encoding is usually 7-bit ASCII,
though it can be changed for your Python installation by calling the
-:func:`sys.setdefaultencoding(encoding)` function in a customised version of
+``sys.setdefaultencoding(encoding)`` function in a customised version of
:file:`site.py`.
Combining 8-bit and Unicode strings always coerces to Unicode, using the default
@@ -203,7 +203,7 @@ U+0660 is an Arabic number.
The :mod:`codecs` module contains functions to look up existing encodings and
register new ones. Unless you want to implement a new encoding, you'll most
-often use the :func:`codecs.lookup(encoding)` function, which returns a
+often use the ``codecs.lookup(encoding)`` function, which returns a
4-element tuple: ``(encode_func, decode_func, stream_reader, stream_writer)``.
* *encode_func* is a function that takes a Unicode string, and returns a 2-tuple
@@ -600,7 +600,7 @@ Python code is found to be improperly indented.
Changes to Built-in Functions
-----------------------------
-A new built-in, :func:`zip(seq1, seq2, ...)`, has been added. :func:`zip`
+A new built-in, ``zip(seq1, seq2, ...)``, has been added. :func:`zip`
returns a list of tuples where each tuple contains the i-th element from each of
the argument sequences. The difference between :func:`zip` and ``map(None,
seq1, seq2)`` is that :func:`map` pads the sequences with ``None`` if the
@@ -619,7 +619,7 @@ level, serial)`` For example, in a hypothetical 2.0.1beta1, ``sys.version_info``
would be ``(2, 0, 1, 'beta', 1)``. *level* is a string such as ``"alpha"``,
``"beta"``, or ``"final"`` for a final release.
-Dictionaries have an odd new method, :meth:`setdefault(key, default)`, which
+Dictionaries have an odd new method, ``setdefault(key, default)``, which
behaves similarly to the existing :meth:`get` method. However, if the key is
missing, :meth:`setdefault` both returns the value of *default* as :meth:`get`
would do, and also inserts it into the dictionary as the value for *key*. Thus,
@@ -1038,7 +1038,7 @@ Brian Gallew contributed OpenSSL support for the :mod:`socket` module. OpenSSL
is an implementation of the Secure Socket Layer, which encrypts the data being
sent over a socket. When compiling Python, you can edit :file:`Modules/Setup`
to include SSL support, which adds an additional function to the :mod:`socket`
-module: :func:`socket.ssl(socket, keyfile, certfile)`, which takes a socket
+module: ``socket.ssl(socket, keyfile, certfile)``, which takes a socket
object and returns an SSL socket. The :mod:`httplib` and :mod:`urllib` modules
were also changed to support ``https://`` URLs, though no one has implemented
FTP or SMTP over SSL.
diff --git a/Doc/whatsnew/2.1.rst b/Doc/whatsnew/2.1.rst
index 117af10..b1ab48e 100644
--- a/Doc/whatsnew/2.1.rst
+++ b/Doc/whatsnew/2.1.rst
@@ -204,7 +204,7 @@ Each of these magic methods can return anything at all: a Boolean, a matrix, a
list, or any other Python object. Alternatively they can raise an exception if
the comparison is impossible, inconsistent, or otherwise meaningless.
-The built-in :func:`cmp(A,B)` function can use the rich comparison machinery,
+The built-in ``cmp(A,B)`` function can use the rich comparison machinery,
and now accepts an optional argument specifying which comparison operation to
use; this is given as one of the strings ``"<"``, ``"<="``, ``">"``, ``">="``,
``"=="``, or ``"!="``. If called without the optional third argument,
@@ -350,7 +350,7 @@ where this behaviour is undesirable, object caches being the most common one,
and another being circular references in data structures such as trees.
For example, consider a memoizing function that caches the results of another
-function :func:`f(x)` by storing the function's argument and its result in a
+function ``f(x)`` by storing the function's argument and its result in a
dictionary::
_cache = {}
@@ -656,7 +656,7 @@ New and Improved Modules
use :mod:`ftplib` to retrieve files and then don't work from behind a firewall.
It's deemed unlikely that this will cause problems for anyone, because Netscape
defaults to passive mode and few people complain, but if passive mode is
- unsuitable for your application or network setup, call :meth:`set_pasv(0)` on
+ unsuitable for your application or network setup, call ``set_pasv(0)`` on
FTP objects to disable passive mode.
* Support for raw socket access has been added to the :mod:`socket` module,
@@ -666,7 +666,7 @@ New and Improved Modules
for displaying timing profiles for Python programs, invoked when the module is
run as a script. Contributed by Eric S. Raymond.
-* A new implementation-dependent function, :func:`sys._getframe([depth])`, has
+* A new implementation-dependent function, ``sys._getframe([depth])``, has
been added to return a given frame object from the current call stack.
:func:`sys._getframe` returns the frame at the top of the call stack; if the
optional integer argument *depth* is supplied, the function returns the frame
diff --git a/Doc/whatsnew/2.2.rst b/Doc/whatsnew/2.2.rst
index 1db1ee7..2e7069d 100644
--- a/Doc/whatsnew/2.2.rst
+++ b/Doc/whatsnew/2.2.rst
@@ -173,12 +173,12 @@ attributes of their own:
* :attr:`__doc__` is the attribute's docstring.
-* :meth:`__get__(object)` is a method that retrieves the attribute value from
+* ``__get__(object)`` is a method that retrieves the attribute value from
*object*.
-* :meth:`__set__(object, value)` sets the attribute on *object* to *value*.
+* ``__set__(object, value)`` sets the attribute on *object* to *value*.
-* :meth:`__delete__(object, value)` deletes the *value* attribute of *object*.
+* ``__delete__(object, value)`` deletes the *value* attribute of *object*.
For example, when you write ``obj.x``, the steps that Python actually performs
are::
@@ -288,7 +288,7 @@ Following this rule, referring to :meth:`D.save` will return :meth:`C.save`,
which is the behaviour we're after. This lookup rule is the same as the one
followed by Common Lisp. A new built-in function, :func:`super`, provides a way
to get at a class's superclasses without having to reimplement Python's
-algorithm. The most commonly used form will be :func:`super(class, obj)`, which
+algorithm. The most commonly used form will be ``super(class, obj)``, which
returns a bound superclass object (not the actual class object). This form
will be used in methods to call a method in the superclass; for example,
:class:`D`'s :meth:`save` method would look like this::
@@ -301,7 +301,7 @@ will be used in methods to call a method in the superclass; for example,
...
:func:`super` can also return unbound superclass objects when called as
-:func:`super(class)` or :func:`super(class1, class2)`, but this probably won't
+``super(class)`` or ``super(class1, class2)``, but this probably won't
often be useful.
@@ -314,13 +314,13 @@ code more readable by automatically mapping an attribute access such as
``obj.parent`` into a method call such as ``obj.get_parent``. Python 2.2 adds
some new ways of controlling attribute access.
-First, :meth:`__getattr__(attr_name)` is still supported by new-style classes,
+First, ``__getattr__(attr_name)`` is still supported by new-style classes,
and nothing about it has changed. As before, it will be called when an attempt
is made to access ``obj.foo`` and no attribute named ``foo`` is found in the
instance's dictionary.
New-style classes also support a new method,
-:meth:`__getattribute__(attr_name)`. The difference between the two methods is
+``__getattribute__(attr_name)``. The difference between the two methods is
that :meth:`__getattribute__` is *always* called whenever any attribute is
accessed, while the old :meth:`__getattr__` is only called if ``foo`` isn't
found in the instance's dictionary.
@@ -441,8 +441,8 @@ work, though it really should.
In Python 2.2, iteration can be implemented separately, and :meth:`__getitem__`
methods can be limited to classes that really do support random access. The
-basic idea of iterators is simple. A new built-in function, :func:`iter(obj)`
-or ``iter(C, sentinel)``, is used to get an iterator. :func:`iter(obj)` returns
+basic idea of iterators is simple. A new built-in function, ``iter(obj)``
+or ``iter(C, sentinel)``, is used to get an iterator. ``iter(obj)`` returns
an iterator for the object *obj*, while ``iter(C, sentinel)`` returns an
iterator that will invoke the callable object *C* until it returns *sentinel* to
signal that the iterator is done.
@@ -793,7 +793,7 @@ further details.
Another change is simpler to explain. Since their introduction, Unicode strings
have supported an :meth:`encode` method to convert the string to a selected
-encoding such as UTF-8 or Latin-1. A symmetric :meth:`decode([*encoding*])`
+encoding such as UTF-8 or Latin-1. A symmetric ``decode([*encoding*])``
method has been added to 8-bit strings (though not to Unicode strings) in 2.2.
:meth:`decode` assumes that the string is in the specified encoding and decodes
it, returning whatever is returned by the codec.
@@ -1203,7 +1203,7 @@ Some of the more notable changes are:
to an MBCS encoded string, as used by the Microsoft file APIs. As MBCS is
explicitly used by the file APIs, Python's choice of ASCII as the default
encoding turns out to be an annoyance. On Unix, the locale's character set is
- used if :func:`locale.nl_langinfo(CODESET)` is available. (Windows support was
+ used if ``locale.nl_langinfo(CODESET)`` is available. (Windows support was
contributed by Mark Hammond with assistance from Marc-André Lemburg. Unix
support was added by Martin von Löwis.)
diff --git a/Doc/whatsnew/2.3.rst b/Doc/whatsnew/2.3.rst
index f4c79e4..f0e48d9 100644
--- a/Doc/whatsnew/2.3.rst
+++ b/Doc/whatsnew/2.3.rst
@@ -504,8 +504,8 @@ This produces the following output::
ZeroDivisionError: integer division or modulo by zero
Slightly more advanced programs will use a logger other than the root logger.
-The :func:`getLogger(name)` function is used to get a particular log, creating
-it if it doesn't exist yet. :func:`getLogger(None)` returns the root logger. ::
+The ``getLogger(name)`` function is used to get a particular log, creating
+it if it doesn't exist yet. ``getLogger(None)`` returns the root logger. ::
log = logging.getLogger('server')
...
@@ -724,10 +724,10 @@ module:
objects to it. Additional built-in and frozen modules can be imported by an
object added to this list.
-Importer objects must have a single method, :meth:`find_module(fullname,
-path=None)`. *fullname* will be a module or package name, e.g. ``string`` or
+Importer objects must have a single method, ``find_module(fullname,
+path=None)``. *fullname* will be a module or package name, e.g. ``string`` or
``distutils.core``. :meth:`find_module` must return a loader object that has a
-single method, :meth:`load_module(fullname)`, that creates and returns the
+single method, ``load_module(fullname)``, that creates and returns the
corresponding module object.
Pseudo-code for Python's new import logic, therefore, looks something like this
@@ -935,7 +935,7 @@ Or use slice objects directly in subscripts::
[0, 2, 4]
To simplify implementing sequences that support extended slicing, slice objects
-now have a method :meth:`indices(length)` which, given the length of a sequence,
+now have a method ``indices(length)`` which, given the length of a sequence,
returns a ``(start, stop, step)`` tuple that can be passed directly to
:func:`range`. :meth:`indices` handles omitted and out-of-bounds indices in a
manner consistent with regular slices (and this innocuous phrase hides a welter
@@ -984,7 +984,7 @@ Here are all of the changes that Python 2.3 makes to the core Python language.
* Built-in types now support the extended slicing syntax, as described in
section :ref:`section-slices` of this document.
-* A new built-in function, :func:`sum(iterable, start=0)`, adds up the numeric
+* A new built-in function, ``sum(iterable, start=0)``, adds up the numeric
items in the iterable object and returns their sum. :func:`sum` only accepts
numbers, meaning that you can't use it to concatenate a bunch of strings.
(Contributed by Alex Martelli.)
@@ -998,7 +998,7 @@ Here are all of the changes that Python 2.3 makes to the core Python language.
its index, now takes optional *start* and *stop* arguments to limit the search
to only part of the list.
-* Dictionaries have a new method, :meth:`pop(key[, *default*])`, that returns
+* Dictionaries have a new method, ``pop(key[, *default*])``, that returns
the value corresponding to *key* and removes that key/value pair from the
dictionary. If the requested key isn't present in the dictionary, *default* is
returned if it's specified and :exc:`KeyError` raised if it isn't. ::
@@ -1020,7 +1020,7 @@ Here are all of the changes that Python 2.3 makes to the core Python language.
{}
>>>
- There's also a new class method, :meth:`dict.fromkeys(iterable, value)`, that
+ There's also a new class method, ``dict.fromkeys(iterable, value)``, that
creates a dictionary with keys taken from the supplied iterator *iterable* and
all values set to *value*, defaulting to ``None``.
@@ -1093,7 +1093,7 @@ Here are all of the changes that Python 2.3 makes to the core Python language.
100 bytecodes, speeding up single-threaded applications by reducing the
switching overhead. Some multithreaded applications may suffer slower response
time, but that's easily fixed by setting the limit back to a lower number using
- :func:`sys.setcheckinterval(N)`. The limit can be retrieved with the new
+ ``sys.setcheckinterval(N)``. The limit can be retrieved with the new
:func:`sys.getcheckinterval` function.
* One minor but far-reaching change is that the names of extension types defined
@@ -1272,10 +1272,10 @@ complete list of changes, or look through the CVS logs for all the details.
* Previously the :mod:`doctest` module would only search the docstrings of
public methods and functions for test cases, but it now also examines private
- ones as well. The :func:`DocTestSuite(` function creates a
+ ones as well. The :func:`DocTestSuite` function creates a
:class:`unittest.TestSuite` object from a set of :mod:`doctest` tests.
-* The new :func:`gc.get_referents(object)` function returns a list of all the
+* The new ``gc.get_referents(object)`` function returns a list of all the
objects referenced by *object*.
* The :mod:`getopt` module gained a new function, :func:`gnu_getopt`, that
@@ -1347,8 +1347,8 @@ complete list of changes, or look through the CVS logs for all the details.
documentation for details.
(Contributed by Raymond Hettinger.)
-* Two new functions in the :mod:`math` module, :func:`degrees(rads)` and
- :func:`radians(degs)`, convert between radians and degrees. Other functions in
+* Two new functions in the :mod:`math` module, ``degrees(rads)`` and
+ ``radians(degs)``, convert between radians and degrees. Other functions in
the :mod:`math` module such as :func:`math.sin` and :func:`math.cos` have always
required input values measured in radians. Also, an optional *base* argument
was added to :func:`math.log` to make it easier to compute logarithms for bases
@@ -1405,7 +1405,7 @@ complete list of changes, or look through the CVS logs for all the details.
and therefore faster performance. Setting the parser object's
:attr:`buffer_text` attribute to :const:`True` will enable buffering.
-* The :func:`sample(population, k)` function was added to the :mod:`random`
+* The ``sample(population, k)`` function was added to the :mod:`random`
module. *population* is a sequence or :class:`xrange` object containing the
elements of a population, and :func:`sample` chooses *k* elements from the
population without replacing chosen elements. *k* can be any value up to
@@ -1451,7 +1451,7 @@ complete list of changes, or look through the CVS logs for all the details.
encryption is not believed to be secure. If you need encryption, use one of the
several AES Python modules that are available separately.
-* The :mod:`shutil` module gained a :func:`move(src, dest)` function that
+* The :mod:`shutil` module gained a ``move(src, dest)`` function that
recursively moves a file or directory to a new location.
* Support for more advanced POSIX signal handling was added to the :mod:`signal`
@@ -1459,7 +1459,7 @@ complete list of changes, or look through the CVS logs for all the details.
platforms.
* The :mod:`socket` module now supports timeouts. You can call the
- :meth:`settimeout(t)` method on a socket object to set a timeout of *t* seconds.
+ ``settimeout(t)`` method on a socket object to set a timeout of *t* seconds.
Subsequent socket operations that take longer than *t* seconds to complete will
abort and raise a :exc:`socket.timeout` exception.
@@ -1480,9 +1480,9 @@ complete list of changes, or look through the CVS logs for all the details.
:program:`tar`\ -format archive files. (Contributed by Lars Gustäbel.)
* The new :mod:`textwrap` module contains functions for wrapping strings
- containing paragraphs of text. The :func:`wrap(text, width)` function takes a
+ containing paragraphs of text. The ``wrap(text, width)`` function takes a
string and returns a list containing the text split into lines of no more than
- the chosen width. The :func:`fill(text, width)` function returns a single
+ the chosen width. The ``fill(text, width)`` function returns a single
string, reformatted to fit into lines no longer than the chosen width. (As you
can guess, :func:`fill` is built on top of :func:`wrap`. For example::
@@ -1903,7 +1903,7 @@ Changes to Python's build process and to the C API include:
short int`, ``I`` for :c:type:`unsigned int`, and ``K`` for :c:type:`unsigned
long long`.
-* A new function, :c:func:`PyObject_DelItemString(mapping, char \*key)` was added
+* A new function, ``PyObject_DelItemString(mapping, char *key)`` was added
as shorthand for ``PyObject_DelItem(mapping, PyString_New(key))``.
* File objects now manage their internal string buffer differently, increasing
diff --git a/Doc/whatsnew/2.4.rst b/Doc/whatsnew/2.4.rst
index 9d339a5..5a28f89 100644
--- a/Doc/whatsnew/2.4.rst
+++ b/Doc/whatsnew/2.4.rst
@@ -37,7 +37,7 @@ PEP 218: Built-In Set Objects
Python 2.3 introduced the :mod:`sets` module. C implementations of set data
types have now been added to the Python core as two new built-in types,
-:func:`set(iterable)` and :func:`frozenset(iterable)`. They provide high speed
+``set(iterable)`` and ``frozenset(iterable)``. They provide high speed
operations for membership testing, for eliminating duplicates from sequences,
and for mathematical operations like unions, intersections, differences, and
symmetric differences. ::
@@ -346,7 +346,7 @@ returned.
PEP 322: Reverse Iteration
==========================
-A new built-in function, :func:`reversed(seq)`, takes a sequence and returns an
+A new built-in function, ``reversed(seq)``, takes a sequence and returns an
iterator that loops over the elements of the sequence in reverse order. ::
>>> for i in reversed(xrange(1,4)):
@@ -384,7 +384,7 @@ PEP 324: New subprocess Module
The standard library provides a number of ways to execute a subprocess, offering
different features and different levels of complexity.
-:func:`os.system(command)` is easy to use, but slow (it runs a shell process
+``os.system(command)`` is easy to use, but slow (it runs a shell process
which executes the command) and dangerous (you have to be careful about escaping
the shell's metacharacters). The :mod:`popen2` module offers classes that can
capture standard output and standard error from the subprocess, but the naming
@@ -431,8 +431,8 @@ The constructor has a number of handy options:
Once you've created the :class:`Popen` instance, you can call its :meth:`wait`
method to pause until the subprocess has exited, :meth:`poll` to check if it's
-exited without pausing, or :meth:`communicate(data)` to send the string *data*
-to the subprocess's standard input. :meth:`communicate(data)` then reads any
+exited without pausing, or ``communicate(data)`` to send the string *data*
+to the subprocess's standard input. ``communicate(data)`` then reads any
data that the subprocess has sent to its standard output or standard error,
returning a tuple ``(stdout_data, stderr_data)``.
@@ -749,10 +749,10 @@ numbers in the current locale.
The solution described in the PEP is to add three new functions to the Python
API that perform ASCII-only conversions, ignoring the locale setting:
-* :c:func:`PyOS_ascii_strtod(str, ptr)` and :c:func:`PyOS_ascii_atof(str, ptr)`
+* ``PyOS_ascii_strtod(str, ptr)`` and ``PyOS_ascii_atof(str, ptr)``
both convert a string to a C :c:type:`double`.
-* :c:func:`PyOS_ascii_formatd(buffer, buf_len, format, d)` converts a
+* ``PyOS_ascii_formatd(buffer, buf_len, format, d)`` converts a
:c:type:`double` to an ASCII string.
The code for these functions came from the GLib library
@@ -778,7 +778,7 @@ Here are all of the changes that Python 2.4 makes to the core Python language.
* Decorators for functions and methods were added (:pep:`318`).
* Built-in :func:`set` and :func:`frozenset` types were added (:pep:`218`).
- Other new built-ins include the :func:`reversed(seq)` function (:pep:`322`).
+ Other new built-ins include the ``reversed(seq)`` function (:pep:`322`).
* Generator expressions were added (:pep:`289`).
@@ -857,7 +857,7 @@ Here are all of the changes that Python 2.4 makes to the core Python language.
(All changes to :meth:`sort` contributed by Raymond Hettinger.)
-* There is a new built-in function :func:`sorted(iterable)` that works like the
+* There is a new built-in function ``sorted(iterable)`` that works like the
in-place :meth:`list.sort` method but can be used in expressions. The
differences are:
@@ -898,8 +898,8 @@ Here are all of the changes that Python 2.4 makes to the core Python language.
For example, you can now run the Python profiler with ``python -m profile``.
(Contributed by Nick Coghlan.)
-* The :func:`eval(expr, globals, locals)` and :func:`execfile(filename, globals,
- locals)` functions and the ``exec`` statement now accept any mapping type
+* The ``eval(expr, globals, locals)`` and ``execfile(filename, globals,
+ locals)`` functions and the ``exec`` statement now accept any mapping type
for the *locals* parameter. Previously this had to be a regular Python
dictionary. (Contributed by Raymond Hettinger.)
@@ -1090,7 +1090,7 @@ complete list of changes, or look through the CVS logs for all the details.
Yves Dionne) and new :meth:`deleteacl` and :meth:`myrights` methods (contributed
by Arnaud Mazin).
-* The :mod:`itertools` module gained a :func:`groupby(iterable[, *func*])`
+* The :mod:`itertools` module gained a ``groupby(iterable[, *func*])``
function. *iterable* is something that can be iterated over to return a stream
of elements, and the optional *func* parameter is a function that takes an
element and returns a key value; if omitted, the key is simply the element
@@ -1139,7 +1139,7 @@ complete list of changes, or look through the CVS logs for all the details.
(Contributed by Hye-Shik Chang.)
-* :mod:`itertools` also gained a function named :func:`tee(iterator, N)` that
+* :mod:`itertools` also gained a function named ``tee(iterator, N)`` that
returns *N* independent iterators that replicate *iterator*. If *N* is omitted,
the default is 2. ::
@@ -1177,7 +1177,7 @@ complete list of changes, or look through the CVS logs for all the details.
level=0, # Log all messages
format='%(levelname):%(process):%(thread):%(message)')
- Other additions to the :mod:`logging` package include a :meth:`log(level, msg)`
+ Other additions to the :mod:`logging` package include a ``log(level, msg)``
convenience method, as well as a :class:`TimedRotatingFileHandler` class that
rotates its log files at a timed interval. The module already had
:class:`RotatingFileHandler`, which rotated logs once the file exceeded a
@@ -1196,7 +1196,7 @@ complete list of changes, or look through the CVS logs for all the details.
group or for a range of groups. (Contributed by Jürgen A. Erhard.)
* Two new functions were added to the :mod:`operator` module,
- :func:`attrgetter(attr)` and :func:`itemgetter(index)`. Both functions return
+ ``attrgetter(attr)`` and ``itemgetter(index)``. Both functions return
callables that take a single argument and return the corresponding attribute or
item; these callables make excellent data extractors when used with :func:`map`
or :func:`sorted`. For example::
@@ -1223,14 +1223,14 @@ complete list of changes, or look through the CVS logs for all the details.
replacement for :func:`rfc822.formatdate`. You may want to write new e-mail
processing code with this in mind. (Change implemented by Anthony Baxter.)
-* A new :func:`urandom(n)` function was added to the :mod:`os` module, returning
+* A new ``urandom(n)`` function was added to the :mod:`os` module, returning
a string containing *n* bytes of random data. This function provides access to
platform-specific sources of randomness such as :file:`/dev/urandom` on Linux or
the Windows CryptoAPI. (Contributed by Trevor Perrin.)
-* Another new function: :func:`os.path.lexists(path)` returns true if the file
+* Another new function: ``os.path.lexists(path)`` returns true if the file
specified by *path* exists, whether or not it's a symbolic link. This differs
- from the existing :func:`os.path.exists(path)` function, which returns false if
+ from the existing ``os.path.exists(path)`` function, which returns false if
*path* is a symlink that points to a destination that doesn't exist.
(Contributed by Beni Cherniavsky.)
@@ -1243,7 +1243,7 @@ complete list of changes, or look through the CVS logs for all the details.
* The :mod:`profile` module can now profile C extension functions. (Contributed
by Nick Bastin.)
-* The :mod:`random` module has a new method called :meth:`getrandbits(N)` that
+* The :mod:`random` module has a new method called ``getrandbits(N)`` that
returns a long integer *N* bits in length. The existing :meth:`randrange`
method now uses :meth:`getrandbits` where appropriate, making generation of
arbitrarily large random numbers more efficient. (Contributed by Raymond
@@ -1272,7 +1272,7 @@ complete list of changes, or look through the CVS logs for all the details.
this, but 2.4 will raise a :exc:`RuntimeError` exception.
* Two new functions were added to the :mod:`socket` module. :func:`socketpair`
- returns a pair of connected sockets and :func:`getservbyport(port)` looks up the
+ returns a pair of connected sockets and ``getservbyport(port)`` looks up the
service name for a given port number. (Contributed by Dave Cole and Barry
Warsaw.)
@@ -1454,11 +1454,11 @@ Some of the changes to Python's build process and to the C API are:
* Another new macro, :c:macro:`Py_CLEAR(obj)`, decreases the reference count of
*obj* and sets *obj* to the null pointer. (Contributed by Jim Fulton.)
-* A new function, :c:func:`PyTuple_Pack(N, obj1, obj2, ..., objN)`, constructs
+* A new function, ``PyTuple_Pack(N, obj1, obj2, ..., objN)``, constructs
tuples from a variable length argument list of Python objects. (Contributed by
Raymond Hettinger.)
-* A new function, :c:func:`PyDict_Contains(d, k)`, implements fast dictionary
+* A new function, ``PyDict_Contains(d, k)``, implements fast dictionary
lookups without masking exceptions raised during the look-up process.
(Contributed by Raymond Hettinger.)
diff --git a/Doc/whatsnew/2.5.rst b/Doc/whatsnew/2.5.rst
index e059cd5..b91e647 100644
--- a/Doc/whatsnew/2.5.rst
+++ b/Doc/whatsnew/2.5.rst
@@ -171,7 +171,7 @@ method, where the first argument has been provided. ::
popup_menu.append( ("Open", open_func, 1) )
Another function in the :mod:`functools` module is the
-:func:`update_wrapper(wrapper, wrapped)` function that helps you write well-
+``update_wrapper(wrapper, wrapped)`` function that helps you write well-
behaved decorators. :func:`update_wrapper` copies the name, module, and
docstring attribute to a wrapper function so that tracebacks inside the wrapped
function are easier to understand. For example, you might write::
@@ -454,7 +454,7 @@ expression on the right-hand side of an assignment. This means you can write
``val = yield i`` but have to use parentheses when there's an operation, as in
``val = (yield i) + 12``.)
-Values are sent into a generator by calling its :meth:`send(value)` method. The
+Values are sent into a generator by calling its ``send(value)`` method. The
generator's code is then resumed and the :keyword:`yield` expression returns the
specified *value*. If the regular :meth:`next` method is called, the
:keyword:`yield` returns :const:`None`.
@@ -496,7 +496,7 @@ function.
In addition to :meth:`send`, there are two other new methods on generators:
-* :meth:`throw(type, value=None, traceback=None)` is used to raise an exception
+* ``throw(type, value=None, traceback=None)`` is used to raise an exception
inside the generator; the exception is raised by the :keyword:`yield` expression
where the generator's execution is paused.
@@ -660,7 +660,7 @@ A high-level explanation of the context management protocol is:
* The code in *BLOCK* is executed.
-* If *BLOCK* raises an exception, the :meth:`__exit__(type, value, traceback)`
+* If *BLOCK* raises an exception, the ``__exit__(type, value, traceback)``
is called with the exception details, the same values returned by
:func:`sys.exc_info`. The method's return value controls whether the exception
is re-raised: any false value re-raises the exception, and ``True`` will result
@@ -773,7 +773,7 @@ decorator as::
with db_transaction(db) as cursor:
...
-The :mod:`contextlib` module also has a :func:`nested(mgr1, mgr2, ...)` function
+The :mod:`contextlib` module also has a ``nested(mgr1, mgr2, ...)`` function
that combines a number of context managers so you don't need to write nested
':keyword:`with`' statements. In this example, the single ':keyword:`with`'
statement both starts a database transaction and acquires a thread lock::
@@ -782,7 +782,7 @@ statement both starts a database transaction and acquires a thread lock::
with nested (db_transaction(db), lock) as (cursor, locked):
...
-Finally, the :func:`closing(object)` function returns *object* so that it can be
+Finally, the ``closing(object)`` function returns *object* so that it can be
bound to a variable, and calls ``object.close`` at the end of the block. ::
import urllib, sys
@@ -955,7 +955,7 @@ interpreter will check that the type returned is correct, and raises a
A corresponding :attr:`nb_index` slot was added to the C-level
:c:type:`PyNumberMethods` structure to let C extensions implement this protocol.
-:c:func:`PyNumber_Index(obj)` can be used in extension code to call the
+``PyNumber_Index(obj)`` can be used in extension code to call the
:meth:`__index__` function and retrieve its result.
@@ -976,7 +976,7 @@ Here are all of the changes that Python 2.5 makes to the core Python language.
* The :class:`dict` type has a new hook for letting subclasses provide a default
value when a key isn't contained in the dictionary. When a key isn't found, the
- dictionary's :meth:`__missing__(key)` method will be called. This hook is used
+ dictionary's ``__missing__(key)`` method will be called. This hook is used
to implement the new :class:`defaultdict` class in the :mod:`collections`
module. The following example defines a dictionary that returns zero for any
missing key::
@@ -989,16 +989,16 @@ Here are all of the changes that Python 2.5 makes to the core Python language.
print d[1], d[2] # Prints 1, 2
print d[3], d[4] # Prints 0, 0
-* Both 8-bit and Unicode strings have new :meth:`partition(sep)` and
- :meth:`rpartition(sep)` methods that simplify a common use case.
+* Both 8-bit and Unicode strings have new ``partition(sep)`` and
+ ``rpartition(sep)`` methods that simplify a common use case.
- The :meth:`find(S)` method is often used to get an index which is then used to
+ The ``find(S)`` method is often used to get an index which is then used to
slice the string and obtain the pieces that are before and after the separator.
- :meth:`partition(sep)` condenses this pattern into a single method call that
+ ``partition(sep)`` condenses this pattern into a single method call that
returns a 3-tuple containing the substring before the separator, the separator
itself, and the substring after the separator. If the separator isn't found,
the first element of the tuple is the entire string and the other two elements
- are empty. :meth:`rpartition(sep)` also returns a 3-tuple but starts searching
+ are empty. ``rpartition(sep)`` also returns a 3-tuple but starts searching
from the end of the string; the ``r`` stands for 'reverse'.
Some examples::
@@ -1157,7 +1157,7 @@ marked in the following list.
.. Patch 1313939, 1359618
-* The :func:`long(str, base)` function is now faster on long digit strings
+* The ``long(str, base)`` function is now faster on long digit strings
because fewer intermediate results are calculated. The peak is for strings of
around 800--1000 digits where the function is 6 times faster. (Contributed by
Alan McIntyre and committed at the NeedForSpeed sprint.)
@@ -1268,7 +1268,7 @@ complete list of changes, or look through the SVN logs for all the details.
(Contributed by Guido van Rossum.)
* The :class:`deque` double-ended queue type supplied by the :mod:`collections`
- module now has a :meth:`remove(value)` method that removes the first occurrence
+ module now has a ``remove(value)`` method that removes the first occurrence
of *value* in the queue, raising :exc:`ValueError` if the value isn't found.
(Contributed by Raymond Hettinger.)
@@ -1291,7 +1291,7 @@ complete list of changes, or look through the SVN logs for all the details.
* The :mod:`csv` module, which parses files in comma-separated value format,
received several enhancements and a number of bugfixes. You can now set the
maximum size in bytes of a field by calling the
- :meth:`csv.field_size_limit(new_limit)` function; omitting the *new_limit*
+ ``csv.field_size_limit(new_limit)`` function; omitting the *new_limit*
argument will return the currently-set limit. The :class:`reader` class now has
a :attr:`line_num` attribute that counts the number of physical lines read from
the source; records can span multiple physical lines, so :attr:`line_num` is not
@@ -1308,7 +1308,7 @@ complete list of changes, or look through the SVN logs for all the details.
(Contributed by Skip Montanaro and Andrew McNamara.)
* The :class:`datetime` class in the :mod:`datetime` module now has a
- :meth:`strptime(string, format)` method for parsing date strings, contributed
+ ``strptime(string, format)`` method for parsing date strings, contributed
by Josh Spoerri. It uses the same format characters as :func:`time.strptime` and
:func:`time.strftime`::
@@ -1403,7 +1403,7 @@ complete list of changes, or look through the SVN logs for all the details.
* The :mod:`mailbox` module underwent a massive rewrite to add the capability to
modify mailboxes in addition to reading them. A new set of classes that include
:class:`mbox`, :class:`MH`, and :class:`Maildir` are used to read mailboxes, and
- have an :meth:`add(message)` method to add messages, :meth:`remove(key)` to
+ have an ``add(message)`` method to add messages, ``remove(key)`` to
remove messages, and :meth:`lock`/:meth:`unlock` to lock/unlock the mailbox.
The following example converts a maildir-format mailbox into an mbox-format
one::
@@ -1458,7 +1458,7 @@ complete list of changes, or look through the SVN logs for all the details.
:func:`wait4` return additional information. :func:`wait3` doesn't take a
process ID as input, so it waits for any child process to exit and returns a
3-tuple of *process-id*, *exit-status*, *resource-usage* as returned from the
- :func:`resource.getrusage` function. :func:`wait4(pid)` does take a process ID.
+ :func:`resource.getrusage` function. ``wait4(pid)`` does take a process ID.
(Contributed by Chad J. Schroeder.)
On FreeBSD, the :func:`os.stat` function now returns times with nanosecond
@@ -1532,8 +1532,8 @@ complete list of changes, or look through the SVN logs for all the details.
In Python code, netlink addresses are represented as a tuple of 2 integers,
``(pid, group_mask)``.
- Two new methods on socket objects, :meth:`recv_into(buffer)` and
- :meth:`recvfrom_into(buffer)`, store the received data in an object that
+ Two new methods on socket objects, ``recv_into(buffer)`` and
+ ``recvfrom_into(buffer)``, store the received data in an object that
supports the buffer protocol instead of returning the data as a string. This
means you can put the data directly into an array or a memory-mapped file.
@@ -1557,8 +1557,8 @@ complete list of changes, or look through the SVN logs for all the details.
year, number, name = s.unpack(data)
You can also pack and unpack data to and from buffer objects directly using the
- :meth:`pack_into(buffer, offset, v1, v2, ...)` and :meth:`unpack_from(buffer,
- offset)` methods. This lets you store data directly into an array or a memory-
+ ``pack_into(buffer, offset, v1, v2, ...)`` and ``unpack_from(buffer,
+ offset)`` methods. This lets you store data directly into an array or a memory-
mapped file.
(:class:`Struct` objects were implemented by Bob Ippolito at the NeedForSpeed
@@ -1592,7 +1592,7 @@ complete list of changes, or look through the SVN logs for all the details.
.. patch 918101
* The :mod:`threading` module now lets you set the stack size used when new
- threads are created. The :func:`stack_size([*size*])` function returns the
+ threads are created. The ``stack_size([*size*])`` function returns the
currently configured stack size, and supplying the optional *size* parameter
sets a new value. Not all platforms support changing the stack size, but
Windows, POSIX threading, and OS/2 all do. (Contributed by Andrew MacIntyre.)
@@ -1911,7 +1911,7 @@ differently. ::
h = hashlib.new('md5') # Provide algorithm as a string
Once a hash object has been created, its methods are the same as before:
-:meth:`update(string)` hashes the specified string into the current digest
+``update(string)`` hashes the specified string into the current digest
state, :meth:`digest` and :meth:`hexdigest` return the digest value as a binary
string or a string of hex digits, and :meth:`copy` returns a new hashing object
with the same digest state.
@@ -2168,20 +2168,20 @@ Changes to Python's build process and to the C API include:
* Two new macros can be used to indicate C functions that are local to the
current file so that a faster calling convention can be used.
- :c:func:`Py_LOCAL(type)` declares the function as returning a value of the
+ ``Py_LOCAL(type)`` declares the function as returning a value of the
specified *type* and uses a fast-calling qualifier.
- :c:func:`Py_LOCAL_INLINE(type)` does the same thing and also requests the
+ ``Py_LOCAL_INLINE(type)`` does the same thing and also requests the
function be inlined. If :c:func:`PY_LOCAL_AGGRESSIVE` is defined before
:file:`python.h` is included, a set of more aggressive optimizations are enabled
for the module; you should benchmark the results to find out if these
optimizations actually make the code faster. (Contributed by Fredrik Lundh at
the NeedForSpeed sprint.)
-* :c:func:`PyErr_NewException(name, base, dict)` can now accept a tuple of base
+* ``PyErr_NewException(name, base, dict)`` can now accept a tuple of base
classes as its *base* argument. (Contributed by Georg Brandl.)
* The :c:func:`PyErr_Warn` function for issuing warnings is now deprecated in
- favour of :c:func:`PyErr_WarnEx(category, message, stacklevel)` which lets you
+ favour of ``PyErr_WarnEx(category, message, stacklevel)`` which lets you
specify the number of stack frames separating this function and the caller. A
*stacklevel* of 1 is the function calling :c:func:`PyErr_WarnEx`, 2 is the
function above that, and so forth. (Added by Neal Norwitz.)
diff --git a/Doc/whatsnew/3.0.rst b/Doc/whatsnew/3.0.rst
index 9f1ec97..7782663 100644
--- a/Doc/whatsnew/3.0.rst
+++ b/Doc/whatsnew/3.0.rst
@@ -812,7 +812,7 @@ Builtins
* The :func:`round` function rounding strategy and return type have
changed. Exact halfway cases are now rounded to the nearest even
result instead of away from zero. (For example, ``round(2.5)`` now
- returns ``2`` rather than ``3``.) :func:`round(x[, n])` now
+ returns ``2`` rather than ``3``.) ``round(x[, n])`` now
delegates to ``x.__round__([n])`` instead of always returning a
float. It generally returns an integer when called with a single
argument and a value of the same type as ``x`` when called with two
diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst
index 0553ec3..99827ff 100644
--- a/Doc/whatsnew/3.2.rst
+++ b/Doc/whatsnew/3.2.rst
@@ -270,7 +270,7 @@ launch of four parallel threads for copying files::
e.submit(shutil.copy, 'src1.txt', 'dest1.txt')
e.submit(shutil.copy, 'src2.txt', 'dest2.txt')
e.submit(shutil.copy, 'src3.txt', 'dest3.txt')
- e.submit(shutil.copy, 'src4.txt', 'dest4.txt')
+ e.submit(shutil.copy, 'src3.txt', 'dest4.txt')
.. seealso::
@@ -2354,7 +2354,7 @@ A number of small performance enhancements have been added:
(Contributed by Antoine Pitrou; :issue:`3001`.)
* The fast-search algorithm in stringlib is now used by the :meth:`split`,
- :meth:`splitlines` and :meth:`replace` methods on
+ :meth:`rsplit`, :meth:`splitlines` and :meth:`replace` methods on
:class:`bytes`, :class:`bytearray` and :class:`str` objects. Likewise, the
algorithm is also used by :meth:`rfind`, :meth:`rindex`, :meth:`rsplit` and
:meth:`rpartition`.
@@ -2471,14 +2471,14 @@ Code Repository
In addition to the existing Subversion code repository at http://svn.python.org
there is now a `Mercurial <http://mercurial.selenic.com/>`_ repository at
-http://hg.python.org/\.
+http://hg.python.org/ .
After the 3.2 release, there are plans to switch to Mercurial as the primary
repository. This distributed version control system should make it easier for
members of the community to create and share external changesets. See
:pep:`385` for details.
-To learn the new version control system, see the `tutorial by Joel
+To learn to use the new version control system, see the `tutorial by Joel
Spolsky <http://hginit.com>`_ or the `Guide to Mercurial Workflows
<http://mercurial.selenic.com/guide/>`_.
diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst
new file mode 100644
index 0000000..7c9ef24
--- /dev/null
+++ b/Doc/whatsnew/3.3.rst
@@ -0,0 +1,2511 @@
+****************************
+ What's New In Python 3.3
+****************************
+
+:Release: |release|
+:Date: |today|
+
+.. Rules for maintenance:
+
+ * Anyone can add text to this document. Do not spend very much time
+ on the wording of your changes, because your text will probably
+ get rewritten to some degree.
+
+ * The maintainer will go through Misc/NEWS periodically and add
+ changes; it's therefore more important to add your changes to
+ Misc/NEWS than to this file.
+
+ * This is not a complete list of every single change; completeness
+ is the purpose of Misc/NEWS. Some changes I consider too small
+ or esoteric to include. If such a change is added to the text,
+ I'll just remove it. (This is another reason you shouldn't spend
+ too much time on writing your addition.)
+
+ * If you want to draw your new text to the attention of the
+ maintainer, add 'XXX' to the beginning of the paragraph or
+ section.
+
+ * It's OK to just add a fragmentary note about a change. For
+ example: "XXX Describe the transmogrify() function added to the
+ socket module." The maintainer will research the change and
+ write the necessary text.
+
+ * You can comment out your additions if you like, but it's not
+ necessary (especially when a final release is some months away).
+
+ * Credit the author of a patch or bugfix. Just the name is
+ sufficient; the e-mail address isn't necessary.
+
+ * It's helpful to add the bug/patch number as a comment:
+
+ XXX Describe the transmogrify() function added to the socket
+ module.
+ (Contributed by P.Y. Developer in :issue:`12345`.)
+
+ This saves the maintainer the effort of going through the Mercurial log
+ when researching a change.
+
+This article explains the new features in Python 3.3, compared to 3.2.
+Python 3.3 was released on September 29, 2012. For full details,
+see the :source:`Misc/NEWS` file.
+
+.. seealso::
+
+ :pep:`398` - Python 3.3 Release Schedule
+
+
+Summary -- Release highlights
+=============================
+
+.. This section singles out the most important changes in Python 3.3.
+ Brevity is key.
+
+New syntax features:
+
+* New ``yield from`` expression for :ref:`generator delegation <pep-380>`.
+* The ``u'unicode'`` syntax is accepted again for :class:`str` objects.
+
+New library modules:
+
+* :mod:`faulthandler` (helps debugging low-level crashes)
+* :mod:`ipaddress` (high-level objects representing IP addresses and masks)
+* :mod:`lzma` (compress data using the XZ / LZMA algorithm)
+* :mod:`unittest.mock` (replace parts of your system under test with mock objects)
+* :mod:`venv` (Python :ref:`virtual environments <pep-405>`, as in the
+ popular ``virtualenv`` package)
+
+New built-in features:
+
+* Reworked :ref:`I/O exception hierarchy <pep-3151>`.
+
+Implementation improvements:
+
+* Rewritten :ref:`import machinery <importlib>` based on :mod:`importlib`.
+* More compact :ref:`unicode strings <pep-393>`.
+* More compact :ref:`attribute dictionaries <pep-412>`.
+
+Significantly Improved Library Modules:
+
+* C Accelerator for the :ref:`decimal <new-decimal>` module.
+* Better unicode handling in the :ref:`email <new-email>` module
+ (:term:`provisional <provisional package>`).
+
+Security improvements:
+
+* Hash randomization is switched on by default.
+
+Please read on for a comprehensive list of user-facing changes.
+
+
+.. _pep-405:
+
+PEP 405: Virtual Environments
+=============================
+
+Virtual environments help create separate Python setups while sharing a
+system-wide base install, for ease of maintenance. Virtual environments
+have their own set of private site packages (i.e. locally-installed
+libraries), and are optionally segregated from the system-wide site
+packages. Their concept and implementation are inspired by the popular
+``virtualenv`` third-party package, but benefit from tighter integration
+with the interpreter core.
+
+This PEP adds the :mod:`venv` module for programmatic access, and the
+:ref:`pyvenv <scripts-pyvenv>` script for command-line access and
+administration. The Python interpreter checks for a ``pyvenv.cfg``,
+file whose existence signals the base of a virtual environment's directory
+tree.
+
+.. seealso::
+
+ :pep:`405` - Python Virtual Environments
+ PEP written by Carl Meyer; implementation by Carl Meyer and Vinay Sajip
+
+
+PEP 420: Namespace Packages
+===========================
+
+Native support for package directories that don't require ``__init__.py``
+marker files and can automatically span multiple path segments (inspired by
+various third party approaches to namespace packages, as described in
+:pep:`420`)
+
+.. seealso::
+
+ :pep:`420` - Namespace packages
+ PEP written by Eric V. Smith; implementation by Eric V. Smith
+ and Barry Warsaw
+
+
+.. _pep-3118-update:
+
+PEP 3118: New memoryview implementation and buffer protocol documentation
+=========================================================================
+
+The implementation of :pep:`3118` has been significantly improved.
+
+The new memoryview implementation comprehensively fixes all ownership and
+lifetime issues of dynamically allocated fields in the Py_buffer struct
+that led to multiple crash reports. Additionally, several functions that
+crashed or returned incorrect results for non-contiguous or multi-dimensional
+input have been fixed.
+
+The memoryview object now has a PEP-3118 compliant getbufferproc()
+that checks the consumer's request type. Many new features have been
+added, most of them work in full generality for non-contiguous arrays
+and arrays with suboffsets.
+
+The documentation has been updated, clearly spelling out responsibilities
+for both exporters and consumers. Buffer request flags are grouped into
+basic and compound flags. The memory layout of non-contiguous and
+multi-dimensional NumPy-style arrays is explained.
+
+Features
+--------
+
+* All native single character format specifiers in struct module syntax
+ (optionally prefixed with '@') are now supported.
+
+* With some restrictions, the cast() method allows changing of format and
+ shape of C-contiguous arrays.
+
+* Multi-dimensional list representations are supported for any array type.
+
+* Multi-dimensional comparisons are supported for any array type.
+
+* One-dimensional memoryviews of hashable (read-only) types with formats B,
+ b or c are now hashable. (Contributed by Antoine Pitrou in :issue:`13411`)
+
+* Arbitrary slicing of any 1-D arrays type is supported. For example, it
+ is now possible to reverse a memoryview in O(1) by using a negative step.
+
+API changes
+-----------
+
+* The maximum number of dimensions is officially limited to 64.
+
+* The representation of empty shape, strides and suboffsets is now
+ an empty tuple instead of None.
+
+* Accessing a memoryview element with format 'B' (unsigned bytes)
+ now returns an integer (in accordance with the struct module syntax).
+ For returning a bytes object the view must be cast to 'c' first.
+
+* memoryview comparisons now use the logical structure of the operands
+ and compare all array elements by value. All format strings in struct
+ module syntax are supported. Views with unrecognised format strings
+ are still permitted, but will always compare as unequal, regardless
+ of view contents.
+
+* For further changes see `Build and C API Changes`_ and `Porting C code`_ .
+
+(Contributed by Stefan Krah in :issue:`10181`)
+
+.. seealso::
+
+ :pep:`3118` - Revising the Buffer Protocol
+
+
+.. _pep-393:
+
+PEP 393: Flexible String Representation
+=======================================
+
+The Unicode string type is changed to support multiple internal
+representations, depending on the character with the largest Unicode ordinal
+(1, 2, or 4 bytes) in the represented string. This allows a space-efficient
+representation in common cases, but gives access to full UCS-4 on all
+systems. For compatibility with existing APIs, several representations may
+exist in parallel; over time, this compatibility should be phased out.
+
+On the Python side, there should be no downside to this change.
+
+On the C API side, PEP 393 is fully backward compatible. The legacy API
+should remain available at least five years. Applications using the legacy
+API will not fully benefit of the memory reduction, or - worse - may use
+a bit more memory, because Python may have to maintain two versions of each
+string (in the legacy format and in the new efficient storage).
+
+Functionality
+-------------
+
+Changes introduced by :pep:`393` are the following:
+
+* Python now always supports the full range of Unicode codepoints, including
+ non-BMP ones (i.e. from ``U+0000`` to ``U+10FFFF``). The distinction between
+ narrow and wide builds no longer exists and Python now behaves like a wide
+ build, even under Windows.
+
+* With the death of narrow builds, the problems specific to narrow builds have
+ also been fixed, for example:
+
+ * :func:`len` now always returns 1 for non-BMP characters,
+ so ``len('\U0010FFFF') == 1``;
+
+ * surrogate pairs are not recombined in string literals,
+ so ``'\uDBFF\uDFFF' != '\U0010FFFF'``;
+
+ * indexing or slicing non-BMP characters returns the expected value,
+ so ``'\U0010FFFF'[0]`` now returns ``'\U0010FFFF'`` and not ``'\uDBFF'``;
+
+ * all other functions in the standard library now correctly handle
+ non-BMP codepoints.
+
+* The value of :data:`sys.maxunicode` is now always ``1114111`` (``0x10FFFF``
+ in hexadecimal). The :c:func:`PyUnicode_GetMax` function still returns
+ either ``0xFFFF`` or ``0x10FFFF`` for backward compatibility, and it should
+ not be used with the new Unicode API (see :issue:`13054`).
+
+* The :file:`./configure` flag ``--with-wide-unicode`` has been removed.
+
+Performance and resource usage
+------------------------------
+
+The storage of Unicode strings now depends on the highest codepoint in the string:
+
+* pure ASCII and Latin1 strings (``U+0000-U+00FF``) use 1 byte per codepoint;
+
+* BMP strings (``U+0000-U+FFFF``) use 2 bytes per codepoint;
+
+* non-BMP strings (``U+10000-U+10FFFF``) use 4 bytes per codepoint.
+
+The net effect is that for most applications, memory usage of string
+storage should decrease significantly - especially compared to former
+wide unicode builds - as, in many cases, strings will be pure ASCII
+even in international contexts (because many strings store non-human
+language data, such as XML fragments, HTTP headers, JSON-encoded data,
+etc.). We also hope that it will, for the same reasons, increase CPU
+cache efficiency on non-trivial applications. The memory usage of
+Python 3.3 is two to three times smaller than Python 3.2, and a little
+bit better than Python 2.7, on a Django benchmark (see the PEP for
+details).
+
+.. seealso::
+
+ :pep:`393` - Flexible String Representation
+ PEP written by Martin von Löwis; implementation by Torsten Becker
+ and Martin von Löwis.
+
+
+.. _pep-397:
+
+PEP 397: Python Launcher for Windows
+====================================
+
+The Python 3.3 Windows installer now includes a ``py`` launcher application
+that can be used to launch Python applications in a version independent
+fashion.
+
+This launcher is invoked implicitly when double-clicking ``*.py`` files.
+If only a single Python version is installed on the system, that version
+will be used to run the file. If multiple versions are installed, the most
+recent version is used by default, but this can be overridden by including
+a Unix-style "shebang line" in the Python script.
+
+The launcher can also be used explicitly from the command line as the ``py``
+application. Running ``py`` follows the same version selection rules as
+implicitly launching scripts, but a more specific version can be selected
+by passing appropriate arguments (such as ``-3`` to request Python 3 when
+Python 2 is also installed, or ``-2.6`` to specifclly request an earlier
+Python version when a more recent version is installed).
+
+In addition to the launcher, the Windows installer now includes an
+option to add the newly installed Python to the system PATH (contributed
+by Brian Curtin in :issue:`3561`).
+
+.. seealso::
+
+ :pep:`397` - Python Launcher for Windows
+ PEP written by Mark Hammond and Martin v. Löwis; implementation by
+ Vinay Sajip.
+
+ Launcher documentation: :ref:`launcher`
+
+ Installer PATH modification: :ref:`windows-path-mod`
+
+
+.. _pep-3151:
+
+PEP 3151: Reworking the OS and IO exception hierarchy
+=====================================================
+
+The hierarchy of exceptions raised by operating system errors is now both
+simplified and finer-grained.
+
+You don't have to worry anymore about choosing the appropriate exception
+type between :exc:`OSError`, :exc:`IOError`, :exc:`EnvironmentError`,
+:exc:`WindowsError`, :exc:`mmap.error`, :exc:`socket.error` or
+:exc:`select.error`. All these exception types are now only one:
+:exc:`OSError`. The other names are kept as aliases for compatibility
+reasons.
+
+Also, it is now easier to catch a specific error condition. Instead of
+inspecting the ``errno`` attribute (or ``args[0]``) for a particular
+constant from the :mod:`errno` module, you can catch the adequate
+:exc:`OSError` subclass. The available subclasses are the following:
+
+* :exc:`BlockingIOError`
+* :exc:`ChildProcessError`
+* :exc:`ConnectionError`
+* :exc:`FileExistsError`
+* :exc:`FileNotFoundError`
+* :exc:`InterruptedError`
+* :exc:`IsADirectoryError`
+* :exc:`NotADirectoryError`
+* :exc:`PermissionError`
+* :exc:`ProcessLookupError`
+* :exc:`TimeoutError`
+
+And the :exc:`ConnectionError` itself has finer-grained subclasses:
+
+* :exc:`BrokenPipeError`
+* :exc:`ConnectionAbortedError`
+* :exc:`ConnectionRefusedError`
+* :exc:`ConnectionResetError`
+
+Thanks to the new exceptions, common usages of the :mod:`errno` can now be
+avoided. For example, the following code written for Python 3.2::
+
+ from errno import ENOENT, EACCES, EPERM
+
+ try:
+ with open("document.txt") as f:
+ content = f.read()
+ except IOError as err:
+ if err.errno == ENOENT:
+ print("document.txt file is missing")
+ elif err.errno in (EACCES, EPERM):
+ print("You are not allowed to read document.txt")
+ else:
+ raise
+
+can now be written without the :mod:`errno` import and without manual
+inspection of exception attributes::
+
+ try:
+ with open("document.txt") as f:
+ content = f.read()
+ except FileNotFoundError:
+ print("document.txt file is missing")
+ except PermissionError:
+ print("You are not allowed to read document.txt")
+
+.. seealso::
+
+ :pep:`3151` - Reworking the OS and IO Exception Hierarchy
+ PEP written and implemented by Antoine Pitrou
+
+
+.. _pep-380:
+
+PEP 380: Syntax for Delegating to a Subgenerator
+================================================
+
+PEP 380 adds the ``yield from`` expression, allowing a generator to delegate
+part of its operations to another generator. This allows a section of code
+containing 'yield' to be factored out and placed in another generator.
+Additionally, the subgenerator is allowed to return with a value, and the
+value is made available to the delegating generator.
+
+While designed primarily for use in delegating to a subgenerator, the ``yield
+from`` expression actually allows delegation to arbitrary subiterators.
+
+For simple iterators, ``yield from iterable`` is essentially just a shortened
+form of ``for item in iterable: yield item``::
+
+ >>> def g(x):
+ ... yield from range(x, 0, -1)
+ ... yield from range(x)
+ ...
+ >>> list(g(5))
+ [5, 4, 3, 2, 1, 0, 1, 2, 3, 4]
+
+However, unlike an ordinary loop, ``yield from`` allows subgenerators to
+receive sent and thrown values directly from the calling scope, and
+return a final value to the outer generator::
+
+ >>> def accumulate(start=0):
+ ... tally = start
+ ... while 1:
+ ... next = yield
+ ... if next is None:
+ ... return tally
+ ... tally += next
+ ...
+ >>> def gather_tallies(tallies, start=0):
+ ... while 1:
+ ... tally = yield from accumulate()
+ ... tallies.append(tally)
+ ...
+ >>> tallies = []
+ >>> acc = gather_tallies(tallies)
+ >>> next(acc) # Ensure the accumulator is ready to accept values
+ >>> for i in range(10):
+ ... acc.send(i)
+ ...
+ >>> acc.send(None) # Finish the first tally
+ >>> for i in range(5):
+ ... acc.send(i)
+ ...
+ >>> acc.send(None) # Finish the second tally
+ >>> tallies
+ [45, 10]
+
+The main principle driving this change is to allow even generators that are
+designed to be used with the ``send`` and ``throw`` methods to be split into
+multiple subgenerators as easily as a single large function can be split into
+multiple subfunctions.
+
+.. seealso::
+
+ :pep:`380` - Syntax for Delegating to a Subgenerator
+ PEP written by Greg Ewing; implementation by Greg Ewing, integrated into
+ 3.3 by Renaud Blanch, Ryan Kelly and Nick Coghlan; documentation by
+ Zbigniew Jędrzejewski-Szmek and Nick Coghlan
+
+
+PEP 409: Suppressing exception context
+======================================
+
+PEP 409 introduces new syntax that allows the display of the chained
+exception context to be disabled. This allows cleaner error messages in
+applications that convert between exception types::
+
+ >>> class D:
+ ... def __init__(self, extra):
+ ... self._extra_attributes = extra
+ ... def __getattr__(self, attr):
+ ... try:
+ ... return self._extra_attributes[attr]
+ ... except KeyError:
+ ... raise AttributeError(attr) from None
+ ...
+ >>> D({}).x
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ File "<stdin>", line 8, in __getattr__
+ AttributeError: x
+
+Without the ``from None`` suffix to suppress the cause, the original
+exception would be displayed by default::
+
+ >>> class C:
+ ... def __init__(self, extra):
+ ... self._extra_attributes = extra
+ ... def __getattr__(self, attr):
+ ... try:
+ ... return self._extra_attributes[attr]
+ ... except KeyError:
+ ... raise AttributeError(attr)
+ ...
+ >>> C({}).x
+ Traceback (most recent call last):
+ File "<stdin>", line 6, in __getattr__
+ KeyError: 'x'
+
+ During handling of the above exception, another exception occurred:
+
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ File "<stdin>", line 8, in __getattr__
+ AttributeError: x
+
+No debugging capability is lost, as the original exception context remains
+available if needed (for example, if an intervening library has incorrectly
+suppressed valuable underlying details)::
+
+ >>> try:
+ ... D({}).x
+ ... except AttributeError as exc:
+ ... print(repr(exc.__context__))
+ ...
+ KeyError('x',)
+
+.. seealso::
+
+ :pep:`409` - Suppressing exception context
+ PEP written by Ethan Furman; implemented by Ethan Furman and Nick
+ Coghlan.
+
+
+PEP 414: Explicit Unicode literals
+======================================
+
+To ease the transition from Python 2 for Unicode aware Python applications
+that make heavy use of Unicode literals, Python 3.3 once again supports the
+"``u``" prefix for string literals. This prefix has no semantic significance
+in Python 3, it is provided solely to reduce the number of purely mechanical
+changes in migrating to Python 3, making it easier for developers to focus on
+the more significant semantic changes (such as the stricter default
+separation of binary and text data).
+
+.. seealso::
+
+ :pep:`414` - Explicit Unicode literals
+ PEP written by Armin Ronacher.
+
+
+PEP 3155: Qualified name for classes and functions
+==================================================
+
+Functions and class objects have a new ``__qualname__`` attribute representing
+the "path" from the module top-level to their definition. For global functions
+and classes, this is the same as ``__name__``. For other functions and classes,
+it provides better information about where they were actually defined, and
+how they might be accessible from the global scope.
+
+Example with (non-bound) methods::
+
+ >>> class C:
+ ... def meth(self):
+ ... pass
+ >>> C.meth.__name__
+ 'meth'
+ >>> C.meth.__qualname__
+ 'C.meth'
+
+Example with nested classes::
+
+ >>> class C:
+ ... class D:
+ ... def meth(self):
+ ... pass
+ ...
+ >>> C.D.__name__
+ 'D'
+ >>> C.D.__qualname__
+ 'C.D'
+ >>> C.D.meth.__name__
+ 'meth'
+ >>> C.D.meth.__qualname__
+ 'C.D.meth'
+
+Example with nested functions::
+
+ >>> def outer():
+ ... def inner():
+ ... pass
+ ... return inner
+ ...
+ >>> outer().__name__
+ 'inner'
+ >>> outer().__qualname__
+ 'outer.<locals>.inner'
+
+The string representation of those objects is also changed to include the
+new, more precise information::
+
+ >>> str(C.D)
+ "<class '__main__.C.D'>"
+ >>> str(C.D.meth)
+ '<function C.D.meth at 0x7f46b9fe31e0>'
+
+.. seealso::
+
+ :pep:`3155` - Qualified name for classes and functions
+ PEP written and implemented by Antoine Pitrou.
+
+
+.. _pep-412:
+
+PEP 412: Key-Sharing Dictionary
+===============================
+
+Dictionaries used for the storage of objects' attributes are now able to
+share part of their internal storage between each other (namely, the part
+which stores the keys and their respective hashes). This reduces the memory
+consumption of programs creating many instances of non-builtin types.
+
+.. seealso::
+
+ :pep:`412` - Key-Sharing Dictionary
+ PEP written and implemented by Mark Shannon.
+
+
+PEP 362: Function Signature Object
+==================================
+
+A new function :func:`inspect.signature` makes introspection of python
+callables easy and straightforward. A broad range of callables is supported:
+python functions, decorated or not, classes, and :func:`functools.partial`
+objects. New classes :class:`inspect.Signature`, :class:`inspect.Parameter`
+and :class:`inspect.BoundArguments` hold information about the call signatures,
+such as, annotations, default values, parameters kinds, and bound arguments,
+which considerably simplifies writing decorators and any code that validates
+or amends calling signatures or arguments.
+
+.. seealso::
+
+ :pep:`362`: - Function Signature Object
+ PEP written by Brett Cannon, Yury Selivanov, Larry Hastings, Jiwon Seo;
+ implemented by Yury Selivanov.
+
+
+PEP 421: Adding sys.implementation
+==================================
+
+A new attribute on the :mod:`sys` module exposes details specific to the
+implementation of the currently running interpreter. The initial set of
+attributes on :attr:`sys.implementation` are ``name``, ``version``,
+``hexversion``, and ``cache_tag``.
+
+The intention of ``sys.implementation`` is to consolidate into one namespace
+the implementation-specific data used by the standard library. This allows
+different Python implementations to share a single standard library code base
+much more easily. In its initial state, ``sys.implementation`` holds only a
+small portion of the implementation-specific data. Over time that ratio will
+shift in order to make the standard library more portable.
+
+One example of improved standard library portability is ``cache_tag``. As of
+Python 3.3, ``sys.implementation.cache_tag`` is used by :mod:`importlib` to
+support :pep:`3147` compliance. Any Python implementation that uses
+``importlib`` for its built-in import system may use ``cache_tag`` to control
+the caching behavior for modules.
+
+SimpleNamespace
+---------------
+
+The implementation of ``sys.implementation`` also introduces a new type to
+Python: :class:`types.SimpleNamespace`. In contrast to a mapping-based
+namespace, like :class:`dict`, ``SimpleNamespace`` is attribute-based, like
+:class:`object`. However, unlike ``object``, ``SimpleNamespace`` instances
+are writable. This means that you can add, remove, and modify the namespace
+through normal attribute access.
+
+.. seealso::
+
+ :pep:`421` - Adding sys.implementation
+ PEP written and implemented by Eric Snow.
+
+
+.. _importlib:
+
+Using importlib as the Implementation of Import
+===============================================
+:issue:`2377` - Replace __import__ w/ importlib.__import__
+:issue:`13959` - Re-implement parts of :mod:`imp` in pure Python
+:issue:`14605` - Make import machinery explicit
+:issue:`14646` - Require loaders set __loader__ and __package__
+
+The :func:`__import__` function is now powered by :func:`importlib.__import__`.
+This work leads to the completion of "phase 2" of :pep:`302`. There are
+multiple benefits to this change. First, it has allowed for more of the
+machinery powering import to be exposed instead of being implicit and hidden
+within the C code. It also provides a single implementation for all Python VMs
+supporting Python 3.3 to use, helping to end any VM-specific deviations in
+import semantics. And finally it eases the maintenance of import, allowing for
+future growth to occur.
+
+For the common user, there should be no visible change in semantics. For
+those whose code currently manipulates import or calls import
+programmatically, the code changes that might possibly be required are covered
+in the `Porting Python code`_ section of this document.
+
+New APIs
+--------
+One of the large benefits of this work is the exposure of what goes into
+making the import statement work. That means the various importers that were
+once implicit are now fully exposed as part of the :mod:`importlib` package.
+
+The abstract base classes defined in :mod:`importlib.abc` have been expanded
+to properly delineate between :term:`meta path finders <meta path finder>`
+and :term:`path entry finders <path entry finder>` by introducing
+:class:`importlib.abc.MetaPathFinder` and
+:class:`importlib.abc.PathEntryFinder`, respectively. The old ABC of
+:class:`importlib.abc.Finder` is now only provided for backwards-compatibility
+and does not enforce any method requirements.
+
+In terms of finders, :class:`importlib.machinery.FileFinder` exposes the
+mechanism used to search for source and bytecode files of a module. Previously
+this class was an implicit member of :attr:`sys.path_hooks`.
+
+For loaders, the new abstract base class :class:`importlib.abc.FileLoader` helps
+write a loader that uses the file system as the storage mechanism for a module's
+code. The loader for source files
+(:class:`importlib.machinery.SourceFileLoader`), sourceless bytecode files
+(:class:`importlib.machinery.SourcelessFileLoader`), and extension modules
+(:class:`importlib.machinery.ExtensionFileLoader`) are now available for
+direct use.
+
+:exc:`ImportError` now has ``name`` and ``path`` attributes which are set when
+there is relevant data to provide. The message for failed imports will also
+provide the full name of the module now instead of just the tail end of the
+module's name.
+
+The :func:`importlib.invalidate_caches` function will now call the method with
+the same name on all finders cached in :attr:`sys.path_importer_cache` to help
+clean up any stored state as necessary.
+
+Visible Changes
+---------------
+
+For potential required changes to code, see the `Porting Python code`_
+section.
+
+Beyond the expanse of what :mod:`importlib` now exposes, there are other
+visible changes to import. The biggest is that :attr:`sys.meta_path` and
+:attr:`sys.path_hooks` now store all of the meta path finders and path entry
+hooks used by import. Previously the finders were implicit and hidden within
+the C code of import instead of being directly exposed. This means that one can
+now easily remove or change the order of the various finders to fit one's needs.
+
+Another change is that all modules have a ``__loader__`` attribute, storing the
+loader used to create the module. :pep:`302` has been updated to make this
+attribute mandatory for loaders to implement, so in the future once 3rd-party
+loaders have been updated people will be able to rely on the existence of the
+attribute. Until such time, though, import is setting the module post-load.
+
+Loaders are also now expected to set the ``__package__`` attribute from
+:pep:`366`. Once again, import itself is already setting this on all loaders
+from :mod:`importlib` and import itself is setting the attribute post-load.
+
+``None`` is now inserted into :attr:`sys.path_importer_cache` when no finder
+can be found on :attr:`sys.path_hooks`. Since :class:`imp.NullImporter` is not
+directly exposed on :attr:`sys.path_hooks` it could no longer be relied upon to
+always be available to use as a value representing no finder found.
+
+All other changes relate to semantic changes which should be taken into
+consideration when updating code for Python 3.3, and thus should be read about
+in the `Porting Python code`_ section of this document.
+
+(Implementation by Brett Cannon)
+
+
+Other Language Changes
+======================
+
+Some smaller changes made to the core Python language are:
+
+* Added support for Unicode name aliases and named sequences.
+ Both :func:`unicodedata.lookup()` and ``'\N{...}'`` now resolve name aliases,
+ and :func:`unicodedata.lookup()` resolves named sequences too.
+
+ (Contributed by Ezio Melotti in :issue:`12753`)
+
+* Unicode database updated to UCD version 6.1.0
+
+* Equality comparisons on :func:`range` objects now return a result reflecting
+ the equality of the underlying sequences generated by those range objects.
+ (:issue:`13201`)
+
+* The ``count()``, ``find()``, ``rfind()``, ``index()`` and ``rindex()``
+ methods of :class:`bytes` and :class:`bytearray` objects now accept an
+ integer between 0 and 255 as their first argument.
+
+ (Contributed by Petri Lehtinen in :issue:`12170`)
+
+* The ``rjust()``, ``ljust()``, and ``center()`` methods of :class:`bytes`
+ and :class:`bytearray` now accept a :class:`bytearray` for the ``fill``
+ argument. (Contributed by Petri Lehtinen in :issue:`12380`.)
+
+* New methods have been added to :class:`list` and :class:`bytearray`:
+ ``copy()`` and ``clear()`` (:issue:`10516`). Consequently,
+ :class:`~collections.abc.MutableSequence` now also defines a
+ :meth:`~collections.abc.MutableSequence.clear` method (:issue:`11388`).
+
+* Raw bytes literals can now be written ``rb"..."`` as well as ``br"..."``.
+
+ (Contributed by Antoine Pitrou in :issue:`13748`.)
+
+* :meth:`dict.setdefault` now does only one lookup for the given key, making
+ it atomic when used with built-in types.
+
+ (Contributed by Filip Gruszczyński in :issue:`13521`.)
+
+* The error messages produced when a function call does not match the function
+ signature have been significantly improved.
+
+ (Contributed by Benjamin Peterson.)
+
+
+A Finer-Grained Import Lock
+===========================
+
+Previous versions of CPython have always relied on a global import lock.
+This led to unexpected annoyances, such as deadlocks when importing a module
+would trigger code execution in a different thread as a side-effect.
+Clumsy workarounds were sometimes employed, such as the
+:c:func:`PyImport_ImportModuleNoBlock` C API function.
+
+In Python 3.3, importing a module takes a per-module lock. This correctly
+serializes importation of a given module from multiple threads (preventing
+the exposure of incompletely initialized modules), while eliminating the
+aforementioned annoyances.
+
+(Contributed by Antoine Pitrou in :issue:`9260`.)
+
+
+Builtin functions and types
+===========================
+
+* :func:`open` gets a new *opener* parameter: the underlying file descriptor
+ for the file object is then obtained by calling *opener* with (*file*,
+ *flags*). It can be used to use custom flags like :data:`os.O_CLOEXEC` for
+ example. The ``'x'`` mode was added: open for exclusive creation, failing if
+ the file already exists.
+* :func:`print`: added the *flush* keyword argument. If the *flush* keyword
+ argument is true, the stream is forcibly flushed.
+* :func:`hash`: hash randomization is enabled by default, see
+ :meth:`object.__hash__` and :envvar:`PYTHONHASHSEED`.
+* The :class:`str` type gets a new :meth:`~str.casefold` method: return a
+ casefolded copy of the string, casefolded strings may be used for caseless
+ matching. For example, ``'ß'.casefold()`` returns ``'ss'``.
+* The sequence documentation has been substantially rewritten to better
+ explain the binary/text sequence distinction and to provide specific
+ documentation sections for the individual builtin sequence types
+ (:issue:`4966`)
+
+
+New Modules
+===========
+
+faulthandler
+------------
+
+This new debug module :mod:`faulthandler` contains functions to dump Python tracebacks explicitly,
+on a fault (a crash like a segmentation fault), after a timeout, or on a user
+signal. Call :func:`faulthandler.enable` to install fault handlers for the
+:const:`SIGSEGV`, :const:`SIGFPE`, :const:`SIGABRT`, :const:`SIGBUS`, and
+:const:`SIGILL` signals. You can also enable them at startup by setting the
+:envvar:`PYTHONFAULTHANDLER` environment variable or by using :option:`-X`
+``faulthandler`` command line option.
+
+Example of a segmentation fault on Linux: ::
+
+ $ python -q -X faulthandler
+ >>> import ctypes
+ >>> ctypes.string_at(0)
+ Fatal Python error: Segmentation fault
+
+ Current thread 0x00007fb899f39700:
+ File "/home/python/cpython/Lib/ctypes/__init__.py", line 486 in string_at
+ File "<stdin>", line 1 in <module>
+ Segmentation fault
+
+
+ipaddress
+---------
+
+The new :mod:`ipaddress` module provides tools for creating and manipulating
+objects representing IPv4 and IPv6 addresses, networks and interfaces (i.e.
+an IP address associated with a specific IP subnet).
+
+(Contributed by Google and Peter Moody in :pep:`3144`)
+
+lzma
+----
+
+The newly-added :mod:`lzma` module provides data compression and decompression
+using the LZMA algorithm, including support for the ``.xz`` and ``.lzma``
+file formats.
+
+(Contributed by Nadeem Vawda and Per Øyvind Karlsen in :issue:`6715`)
+
+
+Improved Modules
+================
+
+abc
+---
+
+Improved support for abstract base classes containing descriptors composed with
+abstract methods. The recommended approach to declaring abstract descriptors is
+now to provide :attr:`__isabstractmethod__` as a dynamically updated
+property. The built-in descriptors have been updated accordingly.
+
+ * :class:`abc.abstractproperty` has been deprecated, use :class:`property`
+ with :func:`abc.abstractmethod` instead.
+ * :class:`abc.abstractclassmethod` has been deprecated, use
+ :class:`classmethod` with :func:`abc.abstractmethod` instead.
+ * :class:`abc.abstractstaticmethod` has been deprecated, use
+ :class:`staticmethod` with :func:`abc.abstractmethod` instead.
+
+(Contributed by Darren Dale in :issue:`11610`)
+
+:meth:`abc.ABCMeta.register` now returns the registered subclass, which means
+it can now be used as a class decorator (:issue:`10868`).
+
+
+array
+-----
+
+The :mod:`array` module supports the :c:type:`long long` type using ``q`` and
+``Q`` type codes.
+
+(Contributed by Oren Tirosh and Hirokazu Yamamoto in :issue:`1172711`)
+
+
+base64
+------
+
+ASCII-only Unicode strings are now accepted by the decoding functions of the
+:mod:`base64` modern interface. For example, ``base64.b64decode('YWJj')``
+returns ``b'abc'``. (Contributed by Catalin Iacob in :issue:`13641`.)
+
+
+binascii
+--------
+
+In addition to the binary objects they normally accept, the ``a2b_`` functions
+now all also accept ASCII-only strings as input. (Contributed by Antoine
+Pitrou in :issue:`13637`.)
+
+
+bz2
+---
+
+The :mod:`bz2` module has been rewritten from scratch. In the process, several
+new features have been added:
+
+* New :func:`bz2.open` function: open a bzip2-compressed file in binary or
+ text mode.
+
+* :class:`bz2.BZ2File` can now read from and write to arbitrary file-like
+ objects, by means of its constructor's *fileobj* argument.
+
+ (Contributed by Nadeem Vawda in :issue:`5863`)
+
+* :class:`bz2.BZ2File` and :func:`bz2.decompress` can now decompress
+ multi-stream inputs (such as those produced by the :program:`pbzip2` tool).
+ :class:`bz2.BZ2File` can now also be used to create this type of file, using
+ the ``'a'`` (append) mode.
+
+ (Contributed by Nir Aides in :issue:`1625`)
+
+* :class:`bz2.BZ2File` now implements all of the :class:`io.BufferedIOBase` API,
+ except for the :meth:`detach` and :meth:`truncate` methods.
+
+
+codecs
+------
+
+The :mod:`~encodings.mbcs` codec has been rewritten to handle correctly
+``replace`` and ``ignore`` error handlers on all Windows versions. The
+:mod:`~encodings.mbcs` codec now supports all error handlers, instead of only
+``replace`` to encode and ``ignore`` to decode.
+
+A new Windows-only codec has been added: ``cp65001`` (:issue:`13216`). It is the
+Windows code page 65001 (Windows UTF-8, ``CP_UTF8``). For example, it is used
+by ``sys.stdout`` if the console output code page is set to cp65001 (e.g., using
+``chcp 65001`` command).
+
+Multibyte CJK decoders now resynchronize faster. They only ignore the first
+byte of an invalid byte sequence. For example, ``b'\xff\n'.decode('gb2312',
+'replace')`` now returns a ``\n`` after the replacement character.
+
+(:issue:`12016`)
+
+Incremental CJK codec encoders are no longer reset at each call to their
+encode() methods. For example::
+
+ $ ./python -q
+ >>> import codecs
+ >>> encoder = codecs.getincrementalencoder('hz')('strict')
+ >>> b''.join(encoder.encode(x) for x in '\u52ff\u65bd\u65bc\u4eba\u3002 Bye.')
+ b'~{NpJ)l6HK!#~} Bye.'
+
+This example gives ``b'~{Np~}~{J)~}~{l6~}~{HK~}~{!#~} Bye.'`` with older Python
+versions.
+
+(:issue:`12100`)
+
+The ``unicode_internal`` codec has been deprecated.
+
+
+collections
+-----------
+
+Addition of a new :class:`~collections.ChainMap` class to allow treating a
+number of mappings as a single unit. (Written by Raymond Hettinger for
+:issue:`11089`, made public in :issue:`11297`)
+
+The abstract base classes have been moved in a new :mod:`collections.abc`
+module, to better differentiate between the abstract and the concrete
+collections classes. Aliases for ABCs are still present in the
+:mod:`collections` module to preserve existing imports. (:issue:`11085`)
+
+.. XXX addition of __slots__ to ABCs not recorded here: internal detail
+
+The :class:`~collections.Counter` class now supports the unary ``+`` and ``-``
+operators, as well as the in-place operators ``+=``, ``-=``, ``|=``, and
+``&=``. (Contributed by Raymond Hettinger in :issue:`13121`.)
+
+
+contextlib
+----------
+
+:class:`~contextlib.ExitStack` now provides a solid foundation for
+programmatic manipulation of context managers and similar cleanup
+functionality. Unlike the previous ``contextlib.nested`` API (which was
+deprecated and removed), the new API is designed to work correctly
+regardless of whether context managers acquire their resources in
+their ``__init__`` method (for example, file objects) or in their
+``__enter__`` method (for example, synchronisation objects from the
+:mod:`threading` module).
+
+(:issue:`13585`)
+
+
+crypt
+-----
+
+Addition of salt and modular crypt format (hashing method) and the :func:`~crypt.mksalt`
+function to the :mod:`crypt` module.
+
+(:issue:`10924`)
+
+curses
+------
+
+ * If the :mod:`curses` module is linked to the ncursesw library, use Unicode
+ functions when Unicode strings or characters are passed (e.g.
+ :c:func:`waddwstr`), and bytes functions otherwise (e.g. :c:func:`waddstr`).
+ * Use the locale encoding instead of ``utf-8`` to encode Unicode strings.
+ * :class:`curses.window` has a new :attr:`curses.window.encoding` attribute.
+ * The :class:`curses.window` class has a new :meth:`~curses.window.get_wch`
+ method to get a wide character
+ * The :mod:`curses` module has a new :meth:`~curses.unget_wch` function to
+ push a wide character so the next :meth:`~curses.window.get_wch` will return
+ it
+
+(Contributed by Iñigo Serna in :issue:`6755`)
+
+datetime
+--------
+
+ * Equality comparisons between naive and aware :class:`~datetime.datetime`
+ instances now return :const:`False` instead of raising :exc:`TypeError`
+ (:issue:`15006`).
+ * New :meth:`datetime.datetime.timestamp` method: Return POSIX timestamp
+ corresponding to the :class:`~datetime.datetime` instance.
+ * The :meth:`datetime.datetime.strftime` method supports formatting years
+ older than 1000.
+ * The :meth:`datetime.datetime.astimezone` method can now be
+ called without arguments to convert datetime instance to the system
+ timezone.
+
+
+.. _new-decimal:
+
+decimal
+-------
+
+:issue:`7652` - integrate fast native decimal arithmetic.
+ C-module and libmpdec written by Stefan Krah.
+
+The new C version of the decimal module integrates the high speed libmpdec
+library for arbitrary precision correctly-rounded decimal floating point
+arithmetic. libmpdec conforms to IBM's General Decimal Arithmetic Specification.
+
+Performance gains range from 10x for database applications to 100x for
+numerically intensive applications. These numbers are expected gains
+for standard precisions used in decimal floating point arithmetic. Since
+the precision is user configurable, the exact figures may vary. For example,
+in integer bignum arithmetic the differences can be significantly higher.
+
+The following table is meant as an illustration. Benchmarks are available
+at http://www.bytereef.org/mpdecimal/quickstart.html.
+
+ +---------+-------------+--------------+-------------+
+ | | decimal.py | _decimal | speedup |
+ +=========+=============+==============+=============+
+ | pi | 42.02s | 0.345s | 120x |
+ +---------+-------------+--------------+-------------+
+ | telco | 172.19s | 5.68s | 30x |
+ +---------+-------------+--------------+-------------+
+ | psycopg | 3.57s | 0.29s | 12x |
+ +---------+-------------+--------------+-------------+
+
+Features
+~~~~~~~~
+
+* The :exc:`~decimal.FloatOperation` signal optionally enables stricter
+ semantics for mixing floats and Decimals.
+
+* If Python is compiled without threads, the C version automatically
+ disables the expensive thread local context machinery. In this case,
+ the variable :data:`~decimal.HAVE_THREADS` is set to False.
+
+API changes
+~~~~~~~~~~~
+
+* The C module has the following context limits, depending on the machine
+ architecture:
+
+ +-------------------+---------------------+------------------------------+
+ | | 32-bit | 64-bit |
+ +===================+=====================+==============================+
+ | :const:`MAX_PREC` | :const:`425000000` | :const:`999999999999999999` |
+ +-------------------+---------------------+------------------------------+
+ | :const:`MAX_EMAX` | :const:`425000000` | :const:`999999999999999999` |
+ +-------------------+---------------------+------------------------------+
+ | :const:`MIN_EMIN` | :const:`-425000000` | :const:`-999999999999999999` |
+ +-------------------+---------------------+------------------------------+
+
+* In the context templates (:class:`~decimal.DefaultContext`,
+ :class:`~decimal.BasicContext` and :class:`~decimal.ExtendedContext`)
+ the magnitude of :attr:`~decimal.Context.Emax` and
+ :attr:`~decimal.Context.Emin` has changed to :const:`999999`.
+
+* The :class:`~decimal.Decimal` constructor in decimal.py does not observe
+ the context limits and converts values with arbitrary exponents or precision
+ exactly. Since the C version has internal limits, the following scheme is
+ used: If possible, values are converted exactly, otherwise
+ :exc:`~decimal.InvalidOperation` is raised and the result is NaN. In the
+ latter case it is always possible to use :meth:`~decimal.Context.create_decimal`
+ in order to obtain a rounded or inexact value.
+
+
+* The power function in decimal.py is always correctly-rounded. In the
+ C version, it is defined in terms of the correctly-rounded
+ :meth:`~decimal.Decimal.exp` and :meth:`~decimal.Decimal.ln` functions,
+ but the final result is only "almost always correctly rounded".
+
+
+* In the C version, the context dictionary containing the signals is a
+ :class:`~collections.abc.MutableMapping`. For speed reasons,
+ :attr:`~decimal.Context.flags` and :attr:`~decimal.Context.traps` always
+ refer to the same :class:`~collections.abc.MutableMapping` that the context
+ was initialized with. If a new signal dictionary is assigned,
+ :attr:`~decimal.Context.flags` and :attr:`~decimal.Context.traps`
+ are updated with the new values, but they do not reference the RHS
+ dictionary.
+
+
+* Pickling a :class:`~decimal.Context` produces a different output in order
+ to have a common interchange format for the Python and C versions.
+
+
+* The order of arguments in the :class:`~decimal.Context` constructor has been
+ changed to match the order displayed by :func:`repr`.
+
+
+* The ``watchexp`` parameter in the :meth:`~decimal.Decimal.quantize` method
+ is deprecated.
+
+
+.. _new-email:
+
+email
+-----
+
+Policy Framework
+~~~~~~~~~~~~~~~~
+
+The email package now has a :mod:`~email.policy` framework. A
+:class:`~email.policy.Policy` is an object with several methods and properties
+that control how the email package behaves. The primary policy for Python 3.3
+is the :class:`~email.policy.Compat32` policy, which provides backward
+compatibility with the email package in Python 3.2. A ``policy`` can be
+specified when an email message is parsed by a :mod:`~email.parser`, or when a
+:class:`~email.message.Message` object is created, or when an email is
+serialized using a :mod:`~email.generator`. Unless overridden, a policy passed
+to a ``parser`` is inherited by all the ``Message`` object and sub-objects
+created by the ``parser``. By default a ``generator`` will use the policy of
+the ``Message`` object it is serializing. The default policy is
+:data:`~email.policy.compat32`.
+
+The minimum set of controls implemented by all ``policy`` objects are:
+
+ =============== =======================================================
+ max_line_length The maximum length, excluding the linesep character(s),
+ individual lines may have when a ``Message`` is
+ serialized. Defaults to 78.
+
+ linesep The character used to separate individual lines when a
+ ``Message`` is serialized. Defaults to ``\n``.
+
+ cte_type ``7bit`` or ``8bit``. ``8bit`` applies only to a
+ ``Bytes`` ``generator``, and means that non-ASCII may
+ be used where allowed by the protocol (or where it
+ exists in the original input).
+
+ raise_on_defect Causes a ``parser`` to raise error when defects are
+ encountered instead of adding them to the ``Message``
+ object's ``defects`` list.
+ =============== =======================================================
+
+A new policy instance, with new settings, is created using the
+:meth:`~email.policy.Policy.clone` method of policy objects. ``clone`` takes
+any of the above controls as keyword arguments. Any control not specified in
+the call retains its default value. Thus you can create a policy that uses
+``\r\n`` linesep characters like this::
+
+ mypolicy = compat32.clone(linesep='\r\n')
+
+Policies can be used to make the generation of messages in the format needed by
+your application simpler. Instead of having to remember to specify
+``linesep='\r\n'`` in all the places you call a ``generator``, you can specify
+it once, when you set the policy used by the ``parser`` or the ``Message``,
+whichever your program uses to create ``Message`` objects. On the other hand,
+if you need to generate messages in multiple forms, you can still specify the
+parameters in the appropriate ``generator`` call. Or you can have custom
+policy instances for your different cases, and pass those in when you create
+the ``generator``.
+
+
+Provisional Policy with New Header API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While the policy framework is worthwhile all by itself, the main motivation for
+introducing it is to allow the creation of new policies that implement new
+features for the email package in a way that maintains backward compatibility
+for those who do not use the new policies. Because the new policies introduce a
+new API, we are releasing them in Python 3.3 as a :term:`provisional policy
+<provisional package>`. Backwards incompatible changes (up to and including
+removal of the code) may occur if deemed necessary by the core developers.
+
+The new policies are instances of :class:`~email.policy.EmailPolicy`,
+and add the following additional controls:
+
+ =============== =======================================================
+ refold_source Controls whether or not headers parsed by a
+ :mod:`~email.parser` are refolded by the
+ :mod:`~email.generator`. It can be ``none``, ``long``,
+ or ``all``. The default is ``long``, which means that
+ source headers with a line longer than
+ ``max_line_length`` get refolded. ``none`` means no
+ line get refolded, and ``all`` means that all lines
+ get refolded.
+
+ header_factory A callable that take a ``name`` and ``value`` and
+ produces a custom header object.
+ =============== =======================================================
+
+The ``header_factory`` is the key to the new features provided by the new
+policies. When one of the new policies is used, any header retrieved from
+a ``Message`` object is an object produced by the ``header_factory``, and any
+time you set a header on a ``Message`` it becomes an object produced by
+``header_factory``. All such header objects have a ``name`` attribute equal
+to the header name. Address and Date headers have additional attributes
+that give you access to the parsed data of the header. This means you can now
+do things like this::
+
+ >>> m = Message(policy=SMTP)
+ >>> m['To'] = 'Éric <foo@example.com>'
+ >>> m['to']
+ 'Éric <foo@example.com>'
+ >>> m['to'].addresses
+ (Address(display_name='Éric', username='foo', domain='example.com'),)
+ >>> m['to'].addresses[0].username
+ 'foo'
+ >>> m['to'].addresses[0].display_name
+ 'Éric'
+ >>> m['Date'] = email.utils.localtime()
+ >>> m['Date'].datetime
+ datetime.datetime(2012, 5, 25, 21, 39, 24, 465484, tzinfo=datetime.timezone(datetime.timedelta(-1, 72000), 'EDT'))
+ >>> m['Date']
+ 'Fri, 25 May 2012 21:44:27 -0400'
+ >>> print(m)
+ To: =?utf-8?q?=C3=89ric?= <foo@example.com>
+ Date: Fri, 25 May 2012 21:44:27 -0400
+
+You will note that the unicode display name is automatically encoded as
+``utf-8`` when the message is serialized, but that when the header is accessed
+directly, you get the unicode version. This eliminates any need to deal with
+the :mod:`email.header` :meth:`~email.header.decode_header` or
+:meth:`~email.header.make_header` functions.
+
+You can also create addresses from parts::
+
+ >>> m['cc'] = [Group('pals', [Address('Bob', 'bob', 'example.com'),
+ ... Address('Sally', 'sally', 'example.com')]),
+ ... Address('Bonzo', addr_spec='bonz@laugh.com')]
+ >>> print(m)
+ To: =?utf-8?q?=C3=89ric?= <foo@example.com>
+ Date: Fri, 25 May 2012 21:44:27 -0400
+ cc: pals: Bob <bob@example.com>, Sally <sally@example.com>;, Bonzo <bonz@laugh.com>
+
+Decoding to unicode is done automatically::
+
+ >>> m2 = message_from_string(str(m))
+ >>> m2['to']
+ 'Éric <foo@example.com>'
+
+When you parse a message, you can use the ``addresses`` and ``groups``
+attributes of the header objects to access the groups and individual
+addresses::
+
+ >>> m2['cc'].addresses
+ (Address(display_name='Bob', username='bob', domain='example.com'), Address(display_name='Sally', username='sally', domain='example.com'), Address(display_name='Bonzo', username='bonz', domain='laugh.com'))
+ >>> m2['cc'].groups
+ (Group(display_name='pals', addresses=(Address(display_name='Bob', username='bob', domain='example.com'), Address(display_name='Sally', username='sally', domain='example.com')), Group(display_name=None, addresses=(Address(display_name='Bonzo', username='bonz', domain='laugh.com'),))
+
+In summary, if you use one of the new policies, header manipulation works the
+way it ought to: your application works with unicode strings, and the email
+package transparently encodes and decodes the unicode to and from the RFC
+standard Content Transfer Encodings.
+
+Other API Changes
+~~~~~~~~~~~~~~~~~
+
+New :class:`~email.parser.BytesHeaderParser`, added to the :mod:`~email.parser`
+module to complement :class:`~email.parser.HeaderParser` and complete the Bytes
+API.
+
+New utility functions:
+
+ * :func:`~email.utils.format_datetime`: given a :class:`~datetime.datetime`,
+ produce a string formatted for use in an email header.
+
+ * :func:`~email.utils.parsedate_to_datetime`: given a date string from
+ an email header, convert it into an aware :class:`~datetime.datetime`,
+ or a naive :class:`~datetime.datetime` if the offset is ``-0000``.
+
+ * :func:`~email.utils.localtime`: With no argument, returns the
+ current local time as an aware :class:`~datetime.datetime` using the local
+ :class:`~datetime.timezone`. Given an aware :class:`~datetime.datetime`,
+ converts it into an aware :class:`~datetime.datetime` using the
+ local :class:`~datetime.timezone`.
+
+
+ftplib
+------
+
+* :class:`ftplib.FTP` now accepts a ``source_address`` keyword argument to
+ specify the ``(host, port)`` to use as the source address in the bind call
+ when creating the outgoing socket. (Contributed by Giampaolo Rodolà
+ in :issue:`8594`.)
+
+* The :class:`~ftplib.FTP_TLS` class now provides a new
+ :func:`~ftplib.FTP_TLS.ccc` function to revert control channel back to
+ plaintext. This can be useful to take advantage of firewalls that know how
+ to handle NAT with non-secure FTP without opening fixed ports. (Contributed
+ by Giampaolo Rodolà in :issue:`12139`)
+
+* Added :meth:`ftplib.FTP.mlsd` method which provides a parsable directory
+ listing format and deprecates :meth:`ftplib.FTP.nlst` and
+ :meth:`ftplib.FTP.dir`. (Contributed by Giampaolo Rodolà in :issue:`11072`)
+
+
+functools
+---------
+
+The :func:`functools.lru_cache` decorator now accepts a ``typed`` keyword
+argument (that defaults to ``False`` to ensure that it caches values of
+different types that compare equal in separate cache slots. (Contributed
+by Raymond Hettinger in :issue:`13227`.)
+
+
+gc
+--
+
+It is now possible to register callbacks invoked by the garbage collector
+before and after collection using the new :data:`~gc.callbacks` list.
+
+
+hmac
+----
+
+A new :func:`~hmac.compare_digest` function has been added to prevent side
+channel attacks on digests through timing analysis. (Contributed by Nick
+Coghlan and Christian Heimes in :issue:`15061`)
+
+
+http
+----
+
+:class:`http.server.BaseHTTPRequestHandler` now buffers the headers and writes
+them all at once when :meth:`~http.server.BaseHTTPRequestHandler.end_headers` is
+called. A new method :meth:`~http.server.BaseHTTPRequestHandler.flush_headers`
+can be used to directly manage when the accumlated headers are sent.
+(Contributed by Andrew Schaaf in :issue:`3709`.)
+
+:class:`http.server` now produces valid ``HTML 4.01 strict`` output.
+(Contributed by Ezio Melotti in :issue:`13295`.)
+
+:class:`http.client.HTTPResponse` now has a
+:meth:`~http.client.HTTPResponse.readinto` method, which means it can be used
+as a :class:`io.RawIOBase` class. (Contributed by John Kuhn in
+:issue:`13464`.)
+
+
+html
+----
+
+:class:`html.parser.HTMLParser` is now able to parse broken markup without
+raising errors, therefore the *strict* argument of the constructor and the
+:exc:`~html.parser.HTMLParseError` exception are now deprecated.
+The ability to parse broken markup is the result of a number of bug fixes that
+are also available on the latest bug fix releases of Python 2.7/3.2.
+(Contributed by Ezio Melotti in :issue:`15114`, and :issue:`14538`,
+:issue:`13993`, :issue:`13960`, :issue:`13358`, :issue:`1745761`,
+:issue:`755670`, :issue:`13357`, :issue:`12629`, :issue:`1200313`,
+:issue:`670664`, :issue:`13273`, :issue:`12888`, :issue:`7311`)
+
+A new :data:`~html.entities.html5` dictionary that maps HTML5 named character
+references to the equivalent Unicode character(s) (e.g. ``html5['gt;'] ==
+'>'``) has been added to the :mod:`html.entities` module. The dictionary is
+now also used by :class:`~html.parser.HTMLParser`. (Contributed by Ezio
+Melotti in :issue:`11113` and :issue:`15156`)
+
+
+imaplib
+-------
+
+The :class:`~imaplib.IMAP4_SSL` constructor now accepts an SSLContext
+parameter to control parameters of the secure channel.
+
+(Contributed by Sijin Joseph in :issue:`8808`)
+
+
+inspect
+-------
+
+A new :func:`~inspect.getclosurevars` function has been added. This function
+reports the current binding of all names referenced from the function body and
+where those names were resolved, making it easier to verify correct internal
+state when testing code that relies on stateful closures.
+
+(Contributed by Meador Inge and Nick Coghlan in :issue:`13062`)
+
+A new :func:`~inspect.getgeneratorlocals` function has been added. This
+function reports the current binding of local variables in the generator's
+stack frame, making it easier to verify correct internal state when testing
+generators.
+
+(Contributed by Meador Inge in :issue:`15153`)
+
+io
+--
+
+The :func:`~io.open` function has a new ``'x'`` mode that can be used to
+exclusively create a new file, and raise a :exc:`FileExistsError` if the file
+already exists. It is based on the C11 'x' mode to fopen().
+
+(Contributed by David Townshend in :issue:`12760`)
+
+The constructor of the :class:`~io.TextIOWrapper` class has a new
+*write_through* optional argument. If *write_through* is ``True``, calls to
+:meth:`~io.TextIOWrapper.write` are guaranteed not to be buffered: any data
+written on the :class:`~io.TextIOWrapper` object is immediately handled to its
+underlying binary buffer.
+
+
+itertools
+---------
+
+:func:`~itertools.accumulate` now takes an optional ``func`` argument for
+providing a user-supplied binary function.
+
+
+logging
+-------
+
+The :func:`~logging.basicConfig` function now supports an optional ``handlers``
+argument taking an iterable of handlers to be added to the root logger.
+
+A class level attribute :attr:`~logging.handlers.SysLogHandler.append_nul` has
+been added to :class:`~logging.handlers.SysLogHandler` to allow control of the
+appending of the ``NUL`` (``\000``) byte to syslog records, since for some
+deamons it is required while for others it is passed through to the log.
+
+
+
+math
+----
+
+The :mod:`math` module has a new function, :func:`~math.log2`, which returns
+the base-2 logarithm of *x*.
+
+(Written by Mark Dickinson in :issue:`11888`).
+
+
+mmap
+----
+
+The :meth:`~mmap.mmap.read` method is now more compatible with other file-like
+objects: if the argument is omitted or specified as ``None``, it returns the
+bytes from the current file position to the end of the mapping. (Contributed
+by Petri Lehtinen in :issue:`12021`.)
+
+
+multiprocessing
+---------------
+
+The new :func:`multiprocessing.connection.wait` function allows to poll
+multiple objects (such as connections, sockets and pipes) with a timeout.
+(Contributed by Richard Oudkerk in :issue:`12328`.)
+
+:class:`multiprocessing.Connection` objects can now be transferred over
+multiprocessing connections.
+(Contributed by Richard Oudkerk in :issue:`4892`.)
+
+:class:`multiprocessing.Process` now accepts a ``daemon`` keyword argument
+to override the default behavior of inheriting the ``daemon`` flag from
+the parent process (:issue:`6064`).
+
+New attribute attribute :data:`multiprocessing.Process.sentinel` allows a
+program to wait on multiple :class:`~multiprocessing.Process` objects at one
+time using the appropriate OS primitives (for example, :mod:`select` on
+posix systems).
+
+New methods :meth:`multiprocessing.pool.Pool.starmap` and
+:meth:`~multiprocessing.pool.Pool.starmap_async` provide
+:func:`itertools.starmap` equivalents to the existing
+:meth:`multiprocessing.pool.Pool.map` and
+:meth:`~multiprocessing.pool.Pool.map_async` functions. (Contributed by Hynek
+Schlawack in :issue:`12708`.)
+
+
+nntplib
+-------
+
+The :class:`nntplib.NNTP` class now supports the context manager protocol to
+unconditionally consume :exc:`socket.error` exceptions and to close the NNTP
+connection when done::
+
+ >>> from nntplib import NNTP
+ >>> with NNTP('news.gmane.org') as n:
+ ... n.group('gmane.comp.python.committers')
+ ...
+ ('211 1755 1 1755 gmane.comp.python.committers', 1755, 1, 1755, 'gmane.comp.python.committers')
+ >>>
+
+(Contributed by Giampaolo Rodolà in :issue:`9795`)
+
+
+os
+--
+
+* The :mod:`os` module has a new :func:`~os.pipe2` function that makes it
+ possible to create a pipe with :data:`~os.O_CLOEXEC` or
+ :data:`~os.O_NONBLOCK` flags set atomically. This is especially useful to
+ avoid race conditions in multi-threaded programs.
+
+* The :mod:`os` module has a new :func:`~os.sendfile` function which provides
+ an efficent "zero-copy" way for copying data from one file (or socket)
+ descriptor to another. The phrase "zero-copy" refers to the fact that all of
+ the copying of data between the two descriptors is done entirely by the
+ kernel, with no copying of data into userspace buffers. :func:`~os.sendfile`
+ can be used to efficiently copy data from a file on disk to a network socket,
+ e.g. for downloading a file.
+
+ (Patch submitted by Ross Lagerwall and Giampaolo Rodolà in :issue:`10882`.)
+
+* To avoid race conditions like symlink attacks and issues with temporary
+ files and directories, it is more reliable (and also faster) to manipulate
+ file descriptors instead of file names. Python 3.3 enhances existing functions
+ and introduces new functions to work on file descriptors (:issue:`4761`,
+ :issue:`10755` and :issue:`14626`).
+
+ - The :mod:`os` module has a new :func:`~os.fwalk` function similar to
+ :func:`~os.walk` except that it also yields file descriptors referring to the
+ directories visited. This is especially useful to avoid symlink races.
+
+ - The following functions get new optional *dir_fd* (:ref:`paths relative to
+ directory descriptors <dir_fd>`) and/or *follow_symlinks* (:ref:`not
+ following symlinks <follow_symlinks>`):
+ :func:`~os.access`, :func:`~os.chflags`, :func:`~os.chmod`, :func:`~os.chown`,
+ :func:`~os.link`, :func:`~os.lstat`, :func:`~os.mkdir`, :func:`~os.mkfifo`,
+ :func:`~os.mknod`, :func:`~os.open`, :func:`~os.readlink`, :func:`~os.remove`,
+ :func:`~os.rename`, :func:`~os.replace`, :func:`~os.rmdir`, :func:`~os.stat`,
+ :func:`~os.symlink`, :func:`~os.unlink`, :func:`~os.utime`. Platform
+ support for using these parameters can be checked via the sets
+ :data:`os.supports_dir_fd` and :data:`os.supports_follows_symlinks`.
+
+ - The following functions now support a file descriptor for their path argument:
+ :func:`~os.chdir`, :func:`~os.chmod`, :func:`~os.chown`,
+ :func:`~os.execve`, :func:`~os.listdir`, :func:`~os.pathconf`, :func:`~os.path.exists`,
+ :func:`~os.stat`, :func:`~os.statvfs`, :func:`~os.utime`. Platform support
+ for this can be checked via the :data:`os.supports_fd` set.
+
+* :func:`~os.access` accepts an ``effective_ids`` keyword argument to turn on
+ using the effective uid/gid rather than the real uid/gid in the access check.
+ Platform support for this can be checked via the
+ :data:`~os.supports_effective_ids` set.
+
+* The :mod:`os` module has two new functions: :func:`~os.getpriority` and
+ :func:`~os.setpriority`. They can be used to get or set process
+ niceness/priority in a fashion similar to :func:`os.nice` but extended to all
+ processes instead of just the current one.
+
+ (Patch submitted by Giampaolo Rodolà in :issue:`10784`.)
+
+* The new :func:`os.replace` function allows cross-platform renaming of a
+ file with overwriting the destination. With :func:`os.rename`, an existing
+ destination file is overwritten under POSIX, but raises an error under
+ Windows.
+ (Contributed by Antoine Pitrou in :issue:`8828`.)
+
+* The stat family of functions (:func:`~os.stat`, :func:`~os.fstat`,
+ and :func:`~os.lstat`) now support reading a file's timestamps
+ with nanosecond precision. Symmetrically, :func:`~os.utime`
+ can now write file timestamps with nanosecond precision. (Contributed by
+ Larry Hastings in :issue:`14127`.)
+
+* The new :func:`os.get_terminal_size` function queries the size of the
+ terminal attached to a file descriptor. See also
+ :func:`shutil.get_terminal_size`.
+ (Contributed by Zbigniew Jędrzejewski-Szmek in :issue:`13609`.)
+
+.. XXX sort out this mess after beta1
+
+* New functions to support Linux extended attributes (:issue:`12720`):
+ :func:`~os.getxattr`, :func:`~os.listxattr`, :func:`~os.removexattr`,
+ :func:`~os.setxattr`.
+
+* New interface to the scheduler. These functions
+ control how a process is allocated CPU time by the operating system. New
+ functions:
+ :func:`~os.sched_get_priority_max`, :func:`~os.sched_get_priority_min`,
+ :func:`~os.sched_getaffinity`, :func:`~os.sched_getparam`,
+ :func:`~os.sched_getscheduler`, :func:`~os.sched_rr_get_interval`,
+ :func:`~os.sched_setaffinity`, :func:`~os.sched_setparam`,
+ :func:`~os.sched_setscheduler`, :func:`~os.sched_yield`,
+
+* New functions to control the file system:
+
+ * :func:`~os.posix_fadvise`: Announces an intention to access data in a
+ specific pattern thus allowing the kernel to make optimizations.
+ * :func:`~os.posix_fallocate`: Ensures that enough disk space is allocated
+ for a file.
+ * :func:`~os.sync`: Force write of everything to disk.
+
+* Additional new posix functions:
+
+ * :func:`~os.lockf`: Apply, test or remove a POSIX lock on an open file descriptor.
+ * :func:`~os.pread`: Read from a file descriptor at an offset, the file
+ offset remains unchanged.
+ * :func:`~os.pwrite`: Write to a file descriptor from an offset, leaving
+ the file offset unchanged.
+ * :func:`~os.readv`: Read from a file descriptor into a number of writable buffers.
+ * :func:`~os.truncate`: Truncate the file corresponding to *path*, so that
+ it is at most *length* bytes in size.
+ * :func:`~os.waitid`: Wait for the completion of one or more child processes.
+ * :func:`~os.writev`: Write the contents of *buffers* to a file descriptor,
+ where *buffers* is an arbitrary sequence of buffers.
+ * :func:`~os.getgrouplist` (:issue:`9344`): Return list of group ids that
+ specified user belongs to.
+
+* :func:`~os.times` and :func:`~os.uname`: Return type changed from a tuple to
+ a tuple-like object with named attributes.
+
+* Some platforms now support additional constants for the :func:`~os.lseek`
+ function, such as ``os.SEEK_HOLE`` and ``os.SEEK_DATA``.
+
+* New constants :data:`~os.RTLD_LAZY`, :data:`~os.RTLD_NOW`,
+ :data:`~os.RTLD_GLOBAL`, :data:`~os.RTLD_LOCAL`, :data:`~os.RTLD_NODELETE`,
+ :data:`~os.RTLD_NOLOAD`, and :data:`~os.RTLD_DEEPBIND` are available on
+ platforms that support them. These are for use with the
+ :func:`sys.setdlopenflags` function, and supersede the similar constants
+ defined in :mod:`ctypes` and :mod:`DLFCN`. (Contributed by Victor Stinner
+ in :issue:`13226`.)
+
+* :func:`os.symlink` now accepts (and ignores) the ``target_is_directory``
+ keyword argument on non-Windows platforms, to ease cross-platform support.
+
+
+pdb
+---
+
+Tab-completion is now available not only for command names, but also their
+arguments. For example, for the ``break`` command, function and file names
+are completed.
+
+(Contributed by Georg Brandl in :issue:`14210`)
+
+
+pickle
+------
+
+:class:`pickle.Pickler` objects now have an optional
+:attr:`~pickle.Pickler.dispatch_table` attribute allowing to set per-pickler
+reduction functions.
+
+(Contributed by Richard Oudkerk in :issue:`14166`.)
+
+
+pydoc
+-----
+
+The Tk GUI and the :func:`~pydoc.serve` function have been removed from the
+:mod:`pydoc` module: ``pydoc -g`` and :func:`~pydoc.serve` have been deprecated
+in Python 3.2.
+
+
+re
+--
+
+:class:`str` regular expressions now support ``\u`` and ``\U`` escapes.
+
+(Contributed by Serhiy Storchaka in :issue:`3665`.)
+
+
+sched
+-----
+
+* :meth:`~sched.scheduler.run` now accepts a *blocking* parameter which when
+ set to False makes the method execute the scheduled events due to expire
+ soonest (if any) and then return immediately.
+ This is useful in case you want to use the :class:`~sched.scheduler` in
+ non-blocking applications. (Contributed by Giampaolo Rodolà in :issue:`13449`)
+
+* :class:`~sched.scheduler` class can now be safely used in multi-threaded
+ environments. (Contributed by Josiah Carlson and Giampaolo Rodolà in
+ :issue:`8684`)
+
+* *timefunc* and *delayfunct* parameters of :class:`~sched.scheduler` class
+ constructor are now optional and defaults to :func:`time.time` and
+ :func:`time.sleep` respectively. (Contributed by Chris Clark in
+ :issue:`13245`)
+
+* :meth:`~sched.scheduler.enter` and :meth:`~sched.scheduler.enterabs`
+ *argument* parameter is now optional. (Contributed by Chris Clark in
+ :issue:`13245`)
+
+* :meth:`~sched.scheduler.enter` and :meth:`~sched.scheduler.enterabs`
+ now accept a *kwargs* parameter. (Contributed by Chris Clark in
+ :issue:`13245`)
+
+
+select
+------
+
+Solaris and derivatives platforms have a new class :class:`select.devpoll`
+for high performance asynchronous sockets via :file:`/dev/poll`.
+(Contributed by Jesús Cea Avión in :issue:`6397`.)
+
+
+shlex
+-----
+
+The previously undocumented helper function ``quote`` from the
+:mod:`pipes` modules has been moved to the :mod:`shlex` module and
+documented. :func:`~shlex.quote` properly escapes all characters in a string
+that might be otherwise given special meaning by the shell.
+
+
+shutil
+------
+
+* New functions:
+
+ * :func:`~shutil.disk_usage`: provides total, used and free disk space
+ statistics. (Contributed by Giampaolo Rodolà in :issue:`12442`)
+ * :func:`~shutil.chown`: allows one to change user and/or group of the given
+ path also specifying the user/group names and not only their numeric
+ ids. (Contributed by Sandro Tosi in :issue:`12191`)
+ * :func:`shutil.get_terminal_size`: returns the size of the terminal window
+ to which the interpreter is attached. (Contributed by Zbigniew
+ Jędrzejewski-Szmek in :issue:`13609`.)
+
+* :func:`~shutil.copy2` and :func:`~shutil.copystat` now preserve file
+ timestamps with nanosecond precision on platforms that support it.
+ They also preserve file "extended attributes" on Linux. (Contributed
+ by Larry Hastings in :issue:`14127` and :issue:`15238`.)
+
+* Several functions now take an optional ``symlinks`` argument: when that
+ parameter is true, symlinks aren't dereferenced and the operation instead
+ acts on the symlink itself (or creates one, if relevant).
+ (Contributed by Hynek Schlawack in :issue:`12715`.)
+
+* When copying files to a different file system, :func:`~shutil.move` now
+ handles symlinks the way the posix ``mv`` command does, recreating the
+ symlink rather than copying the target file contents. (Contributed by
+ Jonathan Niehof in :issue:`9993`.) :func:`~shutil.move` now also returns
+ the ``dst`` argument as its result.
+
+* :func:`~shutil.rmtree` is now resistant to symlink attacks on platforms
+ which support the new ``dir_fd`` parameter in :func:`os.open` and
+ :func:`os.unlink`. (Contributed by Martin von Löwis and Hynek Schlawack
+ in :issue:`4489`.)
+
+
+signal
+------
+
+* The :mod:`signal` module has new functions:
+
+ * :func:`~signal.pthread_sigmask`: fetch and/or change the signal mask of the
+ calling thread (Contributed by Jean-Paul Calderone in :issue:`8407`) ;
+ * :func:`~signal.pthread_kill`: send a signal to a thread ;
+ * :func:`~signal.sigpending`: examine pending functions ;
+ * :func:`~signal.sigwait`: wait a signal.
+ * :func:`~signal.sigwaitinfo`: wait for a signal, returning detailed
+ information about it.
+ * :func:`~signal.sigtimedwait`: like :func:`~signal.sigwaitinfo` but with a
+ timeout.
+
+* The signal handler writes the signal number as a single byte instead of
+ a nul byte into the wakeup file descriptor. So it is possible to wait more
+ than one signal and know which signals were raised.
+
+* :func:`signal.signal` and :func:`signal.siginterrupt` raise an OSError,
+ instead of a RuntimeError: OSError has an errno attribute.
+
+
+smtpd
+-----
+
+The :mod:`smtpd` module now supports :rfc:`5321` (extended SMTP) and :rfc:`1870`
+(size extension). Per the standard, these extensions are enabled if and only
+if the client initiates the session with an ``EHLO`` command.
+
+(Initial ``ELHO`` support by Alberto Trevino. Size extension by Juhana
+Jauhiainen. Substantial additional work on the patch contributed by Michele
+Orrù and Dan Boswell. :issue:`8739`)
+
+
+smtplib
+-------
+
+The :class:`~smtplib.SMTP`, :class:`~smtplib.SMTP_SSL`, and
+:class:`~smtplib.LMTP` classes now accept a ``source_address`` keyword argument
+to specify the ``(host, port)`` to use as the source address in the bind call
+when creating the outgoing socket. (Contributed by Paulo Scardine in
+:issue:`11281`.)
+
+:class:`~smtplib.SMTP` now supports the context manager protocol, allowing an
+``SMTP`` instance to be used in a ``with`` statement. (Contributed
+by Giampaolo Rodolà in :issue:`11289`.)
+
+The :class:`~smtplib.SMTP_SSL` constructor and the :meth:`~smtplib.SMTP.starttls`
+method now accept an SSLContext parameter to control parameters of the secure
+channel. (Contributed by Kasun Herath in :issue:`8809`)
+
+
+socket
+------
+
+* The :class:`~socket.socket` class now exposes additional methods to process
+ ancillary data when supported by the underlying platform:
+
+ * :func:`~socket.socket.sendmsg`
+ * :func:`~socket.socket.recvmsg`
+ * :func:`~socket.socket.recvmsg_into`
+
+ (Contributed by David Watson in :issue:`6560`, based on an earlier patch by
+ Heiko Wundram)
+
+* The :class:`~socket.socket` class now supports the PF_CAN protocol family
+ (http://en.wikipedia.org/wiki/Socketcan), on Linux
+ (http://lwn.net/Articles/253425).
+
+ (Contributed by Matthias Fuchs, updated by Tiago Gonçalves in :issue:`10141`)
+
+* The :class:`~socket.socket` class now supports the PF_RDS protocol family
+ (http://en.wikipedia.org/wiki/Reliable_Datagram_Sockets and
+ http://oss.oracle.com/projects/rds/).
+
+* The :class:`~socket.socket` class now supports the ``PF_SYSTEM`` protocol
+ family on OS X. (Contributed by Michael Goderbauer in :issue:`13777`.)
+
+* New function :func:`~socket.sethostname` allows the hostname to be set
+ on unix systems if the calling process has sufficient privileges.
+ (Contributed by Ross Lagerwall in :issue:`10866`.)
+
+
+socketserver
+------------
+
+:class:`~socketserver.BaseServer` now has an overridable method
+:meth:`~socketserver.BaseServer.service_actions` that is called by the
+:meth:`~socketserver.BaseServer.serve_forever` method in the service loop.
+:class:`~socketserver.ForkingMixIn` now uses this to clean up zombie
+child proceses. (Contributed by Justin Warkentin in :issue:`11109`.)
+
+
+sqlite3
+-------
+
+New :class:`sqlite3.Connection` method
+:meth:`~sqlite3.Connection.set_trace_callback` can be used to capture a trace of
+all sql commands processed by sqlite. (Contributed by Torsten Landschoff
+in :issue:`11688`.)
+
+
+ssl
+---
+
+* The :mod:`ssl` module has two new random generation functions:
+
+ * :func:`~ssl.RAND_bytes`: generate cryptographically strong
+ pseudo-random bytes.
+ * :func:`~ssl.RAND_pseudo_bytes`: generate pseudo-random bytes.
+
+ (Contributed by Victor Stinner in :issue:`12049`)
+
+* The :mod:`ssl` module now exposes a finer-grained exception hierarchy
+ in order to make it easier to inspect the various kinds of errors.
+ (Contributed by Antoine Pitrou in :issue:`11183`)
+
+* :meth:`~ssl.SSLContext.load_cert_chain` now accepts a *password* argument
+ to be used if the private key is encrypted.
+ (Contributed by Adam Simpkins in :issue:`12803`)
+
+* Diffie-Hellman key exchange, both regular and Elliptic Curve-based, is
+ now supported through the :meth:`~ssl.SSLContext.load_dh_params` and
+ :meth:`~ssl.SSLContext.set_ecdh_curve` methods.
+ (Contributed by Antoine Pitrou in :issue:`13626` and :issue:`13627`)
+
+* SSL sockets have a new :meth:`~ssl.SSLSocket.get_channel_binding` method
+ allowing the implementation of certain authentication mechanisms such as
+ SCRAM-SHA-1-PLUS. (Contributed by Jacek Konieczny in :issue:`12551`)
+
+* You can query the SSL compression algorithm used by an SSL socket, thanks
+ to its new :meth:`~ssl.SSLSocket.compression` method. The new attribute
+ :attr:`~ssl.OP_NO_COMPRESSION` can be used to disable compression.
+ (Contributed by Antoine Pitrou in :issue:`13634`)
+
+* Support has been added for the Next Procotol Negotiation extension using
+ the :meth:`ssl.SSLContext.set_npn_protocols` method.
+ (Contributed by Colin Marc in :issue:`14204`)
+
+* SSL errors can now be introspected more easily thanks to
+ :attr:`~ssl.SSLError.library` and :attr:`~ssl.SSLError.reason` attributes.
+ (Contributed by Antoine Pitrou in :issue:`14837`)
+
+* The :func:`~ssl.get_server_certificate` function now supports IPv6.
+ (Contributed by Charles-François Natali in :issue:`11811`.)
+
+* New attribute :attr:`~ssl.OP_CIPHER_SERVER_PREFERENCE` allows setting
+ SSLv3 server sockets to use the server's cipher ordering preference rather
+ than the client's (:issue:`13635`).
+
+
+stat
+----
+
+The undocumented tarfile.filemode function has been moved to
+:func:`stat.filemode`. It can be used to convert a file's mode to a string of
+the form '-rwxrwxrwx'.
+
+(Contributed by Giampaolo Rodolà in :issue:`14807`)
+
+
+struct
+------
+
+The :mod:`struct` module now supports ``ssize_t`` and ``size_t`` via the
+new codes ``n`` and ``N``, respectively. (Contributed by Antoine Pitrou
+in :issue:`3163`.)
+
+
+subprocess
+----------
+
+Command strings can now be bytes objects on posix platforms. (Contributed by
+Victor Stinner in :issue:`8513`.)
+
+A new constant :data:`~subprocess.DEVNULL` allows suppressing output in a
+platform-independent fashion. (Contributed by Ross Lagerwall in
+:issue:`5870`.)
+
+
+sys
+---
+
+The :mod:`sys` module has a new :data:`~sys.thread_info` :term:`struct
+sequence` holding informations about the thread implementation
+(:issue:`11223`).
+
+
+tarfile
+-------
+
+:mod:`tarfile` now supports ``lzma`` encoding via the :mod:`lzma` module.
+(Contributed by Lars Gustäbel in :issue:`5689`.)
+
+
+tempfile
+--------
+
+:class:`tempfile.SpooledTemporaryFile`\'s
+:meth:`~tempfile.SpooledTemporaryFile.trucate` method now accepts
+a ``size`` parameter. (Contributed by Ryan Kelly in :issue:`9957`.)
+
+
+textwrap
+--------
+
+The :mod:`textwrap` module has a new :func:`~textwrap.indent` that makes
+it straightforward to add a common prefix to selected lines in a block
+of text (:issue:`13857`).
+
+
+threading
+---------
+
+:class:`threading.Condition`, :class:`threading.Semaphore`,
+:class:`threading.BoundedSemaphore`, :class:`threading.Event`, and
+:class:`threading.Timer`, all of which used to be factory functions returning a
+class instance, are now classes and may be subclassed. (Contributed by Éric
+Araujo in :issue:`10968`).
+
+The :class:`threading.Thread` constructor now accepts a ``daemon`` keyword
+argument to override the default behavior of inheriting the ``deamon`` flag
+value from the parent thread (:issue:`6064`).
+
+The formerly private function ``_thread.get_ident`` is now available as the
+public function :func:`threading.get_ident`. This eliminates several cases of
+direct access to the ``_thread`` module in the stdlib. Third party code that
+used ``_thread.get_ident`` should likewise be changed to use the new public
+interface.
+
+
+time
+----
+
+The :pep:`418` added new functions to the :mod:`time` module:
+
+* :func:`~time.get_clock_info`: Get information on a clock.
+* :func:`~time.monotonic`: Monotonic clock (cannot go backward), not affected
+ by system clock updates.
+* :func:`~time.perf_counter`: Performance counter with the highest available
+ resolution to measure a short duration.
+* :func:`~time.process_time`: Sum of the system and user CPU time of the
+ current process.
+
+Other new functions:
+
+* :func:`~time.clock_getres`, :func:`~time.clock_gettime` and
+ :func:`~time.clock_settime` functions with ``CLOCK_xxx`` constants.
+ (Contributed by Victor Stinner in :issue:`10278`)
+
+To improve cross platform consistency, :func:`~time.sleep` now raises a
+:exc:`ValueError` when passed a negative sleep value. Previously this was an
+error on posix, but produced an infinite sleep on Windows.
+
+
+types
+-----
+
+Add a new :class:`types.MappingProxyType` class: Read-only proxy of a mapping.
+(:issue:`14386`)
+
+
+The new functions `types.new_class` and `types.prepare_class` provide support
+for PEP 3115 compliant dynamic type creation. (:issue:`14588`)
+
+
+unittest
+--------
+
+:meth:`.assertRaises`, :meth:`.assertRaisesRegex`, :meth:`.assertWarns`, and
+:meth:`.assertWarnsRegex` now accept a keyword argument *msg* when used as
+context managers. (Contributed by Ezio Melotti and Winston Ewert in
+:issue:`10775`)
+
+:meth:`unittest.TestCase.run` now returns the :class:`~unittest.TestResult`
+object.
+
+
+urllib
+------
+
+The :class:`~urllib.request.Request` class, now accepts a *method* argument
+used by :meth:`~urllib.request.Request.get_method` to determine what HTTP method
+should be used. For example, this will send a ``'HEAD'`` request::
+
+ >>> urlopen(Request('http://www.python.org', method='HEAD'))
+
+(:issue:`1673007`)
+
+
+webbrowser
+----------
+
+The :mod:`webbrowser` module supports more "browsers": Google Chrome (named
+:program:`chrome`, :program:`chromium`, :program:`chrome-browser` or
+:program:`chromium-browser` depending on the version and operating system),
+and the generic launchers :program:`xdg-open`, from the FreeDesktop.org
+project, and :program:`gvfs-open`, which is the default URI handler for GNOME
+3. (The former contributed by Arnaud Calmettes in :issue:`13620`, the latter
+by Matthias Klose in :issue:`14493`)
+
+
+xml.etree.ElementTree
+---------------------
+
+The :mod:`xml.etree.ElementTree` module now imports its C accelerator by
+default; there is no longer a need to explicitly import
+:mod:`xml.etree.cElementTree` (this module stays for backwards compatibility,
+but is now deprecated). In addition, the ``iter`` family of methods of
+:class:`~xml.etree.ElementTree.Element` has been optimized (rewritten in C).
+The module's documentation has also been greatly improved with added examples
+and a more detailed reference.
+
+
+zlib
+----
+
+New attribute :attr:`zlib.Decompress.eof` makes it possible to distinguish
+between a properly-formed compressed stream and an incomplete or truncated one.
+(Contributed by Nadeem Vawda in :issue:`12646`.)
+
+New attribute :attr:`zlib.ZLIB_RUNTIME_VERSION` reports the version string of
+the underlying ``zlib`` library that is loaded at runtime. (Contributed by
+Torsten Landschoff in :issue:`12306`.)
+
+
+Optimizations
+=============
+
+Major performance enhancements have been added:
+
+* Thanks to :pep:`393`, some operations on Unicode strings have been optimized:
+
+ * the memory footprint is divided by 2 to 4 depending on the text
+ * encode an ASCII string to UTF-8 doesn't need to encode characters anymore,
+ the UTF-8 representation is shared with the ASCII representation
+ * the UTF-8 encoder has been optimized
+ * repeating a single ASCII letter and getting a substring of a ASCII strings
+ is 4 times faster
+
+* UTF-8 is now 2x to 4x faster. UTF-16 encoding is now up to 10x faster.
+
+ (contributed by Serhiy Storchaka, :issue:`14624`, :issue:`14738` and
+ :issue:`15026`.)
+
+
+Build and C API Changes
+=======================
+
+Changes to Python's build process and to the C API include:
+
+* New :pep:`3118` related function:
+
+ * :c:func:`PyMemoryView_FromMemory`
+
+* :pep:`393` added new Unicode types, macros and functions:
+
+ * High-level API:
+
+ * :c:func:`PyUnicode_CopyCharacters`
+ * :c:func:`PyUnicode_FindChar`
+ * :c:func:`PyUnicode_GetLength`, :c:macro:`PyUnicode_GET_LENGTH`
+ * :c:func:`PyUnicode_New`
+ * :c:func:`PyUnicode_Substring`
+ * :c:func:`PyUnicode_ReadChar`, :c:func:`PyUnicode_WriteChar`
+
+ * Low-level API:
+
+ * :c:type:`Py_UCS1`, :c:type:`Py_UCS2`, :c:type:`Py_UCS4` types
+ * :c:type:`PyASCIIObject` and :c:type:`PyCompactUnicodeObject` structures
+ * :c:macro:`PyUnicode_READY`
+ * :c:func:`PyUnicode_FromKindAndData`
+ * :c:func:`PyUnicode_AsUCS4`, :c:func:`PyUnicode_AsUCS4Copy`
+ * :c:macro:`PyUnicode_DATA`, :c:macro:`PyUnicode_1BYTE_DATA`,
+ :c:macro:`PyUnicode_2BYTE_DATA`, :c:macro:`PyUnicode_4BYTE_DATA`
+ * :c:macro:`PyUnicode_KIND` with :c:type:`PyUnicode_Kind` enum:
+ :c:data:`PyUnicode_WCHAR_KIND`, :c:data:`PyUnicode_1BYTE_KIND`,
+ :c:data:`PyUnicode_2BYTE_KIND`, :c:data:`PyUnicode_4BYTE_KIND`
+ * :c:macro:`PyUnicode_READ`, :c:macro:`PyUnicode_READ_CHAR`, :c:macro:`PyUnicode_WRITE`
+ * :c:macro:`PyUnicode_MAX_CHAR_VALUE`
+
+* :c:macro:`PyArg_ParseTuple` now accepts a :class:`bytearray` for the ``c``
+ format (:issue:`12380`).
+
+
+
+Deprecated
+==========
+
+Unsupported Operating Systems
+-----------------------------
+
+OS/2 and VMS are no longer supported due to the lack of a maintainer.
+
+Windows 2000 and Windows platforms which set ``COMSPEC`` to ``command.com``
+are no longer supported due to maintenance burden.
+
+OSF support, which was deprecated in 3.2, has been completely removed.
+
+
+Deprecated Python modules, functions and methods
+------------------------------------------------
+
+* Passing a non-empty string to ``object.__format__()`` is deprecated, and
+ will produce a :exc:`TypeError` in Python 3.4 (:issue:`9856`).
+* The ``unicode_internal`` codec has been deprecated because of the
+ :pep:`393`, use UTF-8, UTF-16 (``utf-16-le`` or ``utf-16-be``), or UTF-32
+ (``utf-32-le`` or ``utf-32-be``)
+* :meth:`ftplib.FTP.nlst` and :meth:`ftplib.FTP.dir`: use
+ :meth:`ftplib.FTP.mlsd`
+* :func:`platform.popen`: use the :mod:`subprocess` module. Check especially
+ the :ref:`subprocess-replacements` section (:issue:`11377`).
+* :issue:`13374`: The Windows bytes API has been deprecated in the :mod:`os`
+ module. Use Unicode filenames, instead of bytes filenames, to not depend on
+ the ANSI code page anymore and to support any filename.
+* :issue:`13988`: The :mod:`xml.etree.cElementTree` module is deprecated. The
+ accelerator is used automatically whenever available.
+* The behaviour of :func:`time.clock` depends on the platform: use the new
+ :func:`time.perf_counter` or :func:`time.process_time` function instead,
+ depending on your requirements, to have a well defined behaviour.
+* The :func:`os.stat_float_times` function is deprecated.
+* :mod:`abc` module:
+
+ * :class:`abc.abstractproperty` has been deprecated, use :class:`property`
+ with :func:`abc.abstractmethod` instead.
+ * :class:`abc.abstractclassmethod` has been deprecated, use
+ :class:`classmethod` with :func:`abc.abstractmethod` instead.
+ * :class:`abc.abstractstaticmethod` has been deprecated, use
+ :class:`staticmethod` with :func:`abc.abstractmethod` instead.
+
+* :mod:`importlib` package:
+
+ * :meth:`importlib.abc.SourceLoader.path_mtime` is now deprecated in favour of
+ :meth:`importlib.abc.SourceLoader.path_stats` as bytecode files now store
+ both the modification time and size of the source file the bytecode file was
+ compiled from.
+
+
+
+
+
+Deprecated functions and types of the C API
+-------------------------------------------
+
+The :c:type:`Py_UNICODE` has been deprecated by :pep:`393` and will be
+removed in Python 4. All functions using this type are deprecated:
+
+Unicode functions and methods using :c:type:`Py_UNICODE` and
+:c:type:`Py_UNICODE*` types:
+
+* :c:macro:`PyUnicode_FromUnicode`: use :c:func:`PyUnicode_FromWideChar` or
+ :c:func:`PyUnicode_FromKindAndData`
+* :c:macro:`PyUnicode_AS_UNICODE`, :c:func:`PyUnicode_AsUnicode`,
+ :c:func:`PyUnicode_AsUnicodeAndSize`: use :c:func:`PyUnicode_AsWideCharString`
+* :c:macro:`PyUnicode_AS_DATA`: use :c:macro:`PyUnicode_DATA` with
+ :c:macro:`PyUnicode_READ` and :c:macro:`PyUnicode_WRITE`
+* :c:macro:`PyUnicode_GET_SIZE`, :c:func:`PyUnicode_GetSize`: use
+ :c:macro:`PyUnicode_GET_LENGTH` or :c:func:`PyUnicode_GetLength`
+* :c:macro:`PyUnicode_GET_DATA_SIZE`: use
+ ``PyUnicode_GET_LENGTH(str) * PyUnicode_KIND(str)`` (only work on ready
+ strings)
+* :c:func:`PyUnicode_AsUnicodeCopy`: use :c:func:`PyUnicode_AsUCS4Copy` or
+ :c:func:`PyUnicode_AsWideCharString`
+* :c:func:`PyUnicode_GetMax`
+
+
+Functions and macros manipulating Py_UNICODE* strings:
+
+* :c:macro:`Py_UNICODE_strlen`: use :c:func:`PyUnicode_GetLength` or
+ :c:macro:`PyUnicode_GET_LENGTH`
+* :c:macro:`Py_UNICODE_strcat`: use :c:func:`PyUnicode_CopyCharacters` or
+ :c:func:`PyUnicode_FromFormat`
+* :c:macro:`Py_UNICODE_strcpy`, :c:macro:`Py_UNICODE_strncpy`,
+ :c:macro:`Py_UNICODE_COPY`: use :c:func:`PyUnicode_CopyCharacters` or
+ :c:func:`PyUnicode_Substring`
+* :c:macro:`Py_UNICODE_strcmp`: use :c:func:`PyUnicode_Compare`
+* :c:macro:`Py_UNICODE_strncmp`: use :c:func:`PyUnicode_Tailmatch`
+* :c:macro:`Py_UNICODE_strchr`, :c:macro:`Py_UNICODE_strrchr`: use
+ :c:func:`PyUnicode_FindChar`
+* :c:macro:`Py_UNICODE_FILL`: use :c:func:`PyUnicode_Fill`
+* :c:macro:`Py_UNICODE_MATCH`
+
+Encoders:
+
+* :c:func:`PyUnicode_Encode`: use :c:func:`PyUnicode_AsEncodedObject`
+* :c:func:`PyUnicode_EncodeUTF7`
+* :c:func:`PyUnicode_EncodeUTF8`: use :c:func:`PyUnicode_AsUTF8` or
+ :c:func:`PyUnicode_AsUTF8String`
+* :c:func:`PyUnicode_EncodeUTF32`
+* :c:func:`PyUnicode_EncodeUTF16`
+* :c:func:`PyUnicode_EncodeUnicodeEscape:` use
+ :c:func:`PyUnicode_AsUnicodeEscapeString`
+* :c:func:`PyUnicode_EncodeRawUnicodeEscape:` use
+ :c:func:`PyUnicode_AsRawUnicodeEscapeString`
+* :c:func:`PyUnicode_EncodeLatin1`: use :c:func:`PyUnicode_AsLatin1String`
+* :c:func:`PyUnicode_EncodeASCII`: use :c:func:`PyUnicode_AsASCIIString`
+* :c:func:`PyUnicode_EncodeCharmap`
+* :c:func:`PyUnicode_TranslateCharmap`
+* :c:func:`PyUnicode_EncodeMBCS`: use :c:func:`PyUnicode_AsMBCSString` or
+ :c:func:`PyUnicode_EncodeCodePage` (with ``CP_ACP`` code_page)
+* :c:func:`PyUnicode_EncodeDecimal`,
+ :c:func:`PyUnicode_TransformDecimalToASCII`
+
+
+Deprecated features
+-------------------
+
+The :mod:`array` module's ``'u'`` format code is now deprecated and will be
+removed in Python 4 together with the rest of the (:c:type:`Py_UNICODE`) API.
+
+
+Porting to Python 3.3
+=====================
+
+This section lists previously described changes and other bugfixes
+that may require changes to your code.
+
+.. _portingpythoncode:
+
+Porting Python code
+-------------------
+
+* Hash randomization is enabled by default. Set the :envvar:`PYTHONHASHSEED`
+ environment variable to ``0`` to disable hash randomization. See also the
+ :meth:`object.__hash__` method.
+
+* :issue:`12326`: On Linux, sys.platform doesn't contain the major version
+ anymore. It is now always 'linux', instead of 'linux2' or 'linux3' depending
+ on the Linux version used to build Python. Replace sys.platform == 'linux2'
+ with sys.platform.startswith('linux'), or directly sys.platform == 'linux' if
+ you don't need to support older Python versions.
+
+* :issue:`13847`, :issue:`14180`: :mod:`time` and :mod:`datetime`:
+ :exc:`OverflowError` is now raised instead of :exc:`ValueError` if a
+ timestamp is out of range. :exc:`OSError` is now raised if C functions
+ :c:func:`gmtime` or :c:func:`localtime` failed.
+
+* The default finders used by import now utilize a cache of what is contained
+ within a specific directory. If you create a Python source file or sourceless
+ bytecode file, make sure to call :func:`importlib.invalidate_caches` to clear
+ out the cache for the finders to notice the new file.
+
+* :exc:`ImportError` now uses the full name of the module that was attemped to
+ be imported. Doctests that check ImportErrors' message will need to be
+ updated to use the full name of the module instead of just the tail of the
+ name.
+
+* The *index* argument to :func:`__import__` now defaults to 0 instead of -1
+ and no longer support negative values. It was an oversight when :pep:`328` was
+ implemented that the default value remained -1. If you need to continue to
+ perform a relative import followed by an absolute import, then perform the
+ relative import using an index of 1, followed by another import using an
+ index of 0. It is preferred, though, that you use
+ :func:`importlib.import_module` rather than call :func:`__import__` directly.
+
+* :func:`__import__` no longer allows one to use an index value other than 0
+ for top-level modules. E.g. ``__import__('sys', level=1)`` is now an error.
+
+* Because :attr:`sys.meta_path` and :attr:`sys.path_hooks` now have finders on
+ them by default, you will most likely want to use :meth:`list.insert` instead
+ of :meth:`list.append` to add to those lists.
+
+* Because ``None`` is now inserted into :attr:`sys.path_importer_cache`, if you
+ are clearing out entries in the dictionary of paths that do not have a
+ finder, you will need to remove keys paired with values of ``None`` **and**
+ :class:`imp.NullImporter` to be backwards-compatible. This will lead to extra
+ overhead on older versions of Python that re-insert ``None`` into
+ :attr:`sys.path_importer_cache` where it repesents the use of implicit
+ finders, but semantically it should not change anything.
+
+* :class:`importlib.abc.Finder` no longer specifies a `find_module()` abstract
+ method that must be implemented. If you were relying on subclasses to
+ implement that method, make sure to check for the method's existence first.
+ You will probably want to check for `find_loader()` first, though, in the
+ case of working with :term:`path entry finders <path entry finder>`.
+
+* :mod:`pkgutil` has been converted to use :mod:`importlib` internally. This
+ eliminates many edge cases where the old behaviour of the PEP 302 import
+ emulation failed to match the behaviour of the real import system. The
+ import emulation itself is still present, but is now deprecated. The
+ :func:`pkgutil.iter_importers` and :func:`pkgutil.walk_packages` functions
+ special case the standard import hooks so they are still supported even
+ though they do not provide the non-standard ``iter_modules()`` method.
+
+* A longstanding RFC-compliance bug (:issue:`1079`) in the parsing done by
+ :func:`email.header.decode_header` has been fixed. Code that uses the
+ standard idiom to convert encoded headers into unicode
+ (``str(make_header(decode_header(h))``) will see no change, but code that
+ looks at the individual tuples returned by decode_header will see that
+ whitespace that precedes or follows ``ASCII`` sections is now included in the
+ ``ASCII`` section. Code that builds headers using ``make_header`` should
+ also continue to work without change, since ``make_header`` continues to add
+ whitespace between ``ASCII`` and non-``ASCII`` sections if it is not already
+ present in the input strings.
+
+* :func:`email.utils.formataddr` now does the correct content transfer
+ encoding when passed non-``ASCII`` display names. Any code that depended on
+ the previous buggy behavior that preserved the non-``ASCII`` unicode in the
+ formatted output string will need to be changed (:issue:`1690608`).
+
+* :meth:`poplib.POP3.quit` may now raise protocol errors like all other
+ ``poplib`` methods. Code that assumes ``quit`` does not raise
+ :exc:`poplib.error_proto` errors may need to be changed if errors on ``quit``
+ are encountered by a particular application (:issue:`11291`).
+
+* The ``strict`` argument to :class:`email.parser.Parser`, deprecated since
+ Python 2.4, has finally been removed.
+
+* The deprecated method ``unittest.TestCase.assertSameElements`` has been
+ removed.
+
+* The deprecated variable ``time.accept2dyear`` has been removed.
+
+* The deprecated ``Context._clamp`` attribute has been removed from the
+ :mod:`decimal` module. It was previously replaced by the public attribute
+ :attr:`~decimal.Context.clamp`. (See :issue:`8540`.)
+
+* The undocumented internal helper class ``SSLFakeFile`` has been removed
+ from :mod:`smtplib`, since its functionality has long been provided directly
+ by :meth:`socket.socket.makefile`.
+
+* Passing a negative value to :func:`time.sleep` on Windows now raises an
+ error instead of sleeping forever. It has always raised an error on posix.
+
+* The ``ast.__version__`` constant has been removed. If you need to
+ make decisions affected by the AST version, use :attr:`sys.version_info`
+ to make the decision.
+
+* Code that used to work around the fact that the :mod:`threading` module used
+ factory functions by subclassing the private classes will need to change to
+ subclass the now-public classes.
+
+* The undocumented debugging machinery in the threading module has been
+ removed, simplifying the code. This should have no effect on production
+ code, but is mentioned here in case any application debug frameworks were
+ interacting with it (:issue:`13550`).
+
+
+Porting C code
+--------------
+
+* In the course of changes to the buffer API the undocumented
+ :c:member:`~Py_buffer.smalltable` member of the
+ :c:type:`Py_buffer` structure has been removed and the
+ layout of the :c:type:`PyMemoryViewObject` has changed.
+
+ All extensions relying on the relevant parts in ``memoryobject.h``
+ or ``object.h`` must be rebuilt.
+
+* Due to :ref:`PEP 393 <pep-393>`, the :c:type:`Py_UNICODE` type and all
+ functions using this type are deprecated (but will stay available for
+ at least five years). If you were using low-level Unicode APIs to
+ construct and access unicode objects and you want to benefit of the
+ memory footprint reduction provided by PEP 393, you have to convert
+ your code to the new :doc:`Unicode API <../c-api/unicode>`.
+
+ However, if you only have been using high-level functions such as
+ :c:func:`PyUnicode_Concat()`, :c:func:`PyUnicode_Join` or
+ :c:func:`PyUnicode_FromFormat()`, your code will automatically take
+ advantage of the new unicode representations.
+
+* :c:func:`PyImport_GetMagicNumber` now returns -1 upon failure.
+
+* As a negative value for the *level* argument to :func:`__import__` is no
+ longer valid, the same now holds for :c:func:`PyImport_ImportModuleLevel`.
+ This also means that the value of *level* used by
+ :c:func:`PyImport_ImportModuleEx` is now 0 instead of -1.
+
+
+Building C extensions
+---------------------
+
+* The range of possible file names for C extensions has been narrowed.
+ Very rarely used spellings have been suppressed: under POSIX, files
+ named ``xxxmodule.so``, ``xxxmodule.abi3.so`` and
+ ``xxxmodule.cpython-*.so`` are no longer recognized as implementing
+ the ``xxx`` module. If you had been generating such files, you have
+ to switch to the other spellings (i.e., remove the ``module`` string
+ from the file names).
+
+ (implemented in :issue:`14040`.)
+
+
+Command Line Switch Changes
+---------------------------
+
+* The -Q command-line flag and related artifacts have been removed. Code
+ checking sys.flags.division_warning will need updating.
+
+ (:issue:`10998`, contributed by Éric Araujo.)
+
+* When :program:`python` is started with :option:`-S`, ``import site``
+ will no longer add site-specific paths to the module search paths. In
+ previous versions, it did.
+
+ (:issue:`11591`, contributed by Carl Meyer with editions by Éric Araujo.)
diff --git a/Doc/whatsnew/changelog.rst b/Doc/whatsnew/changelog.rst
new file mode 100644
index 0000000..57e2dab
--- /dev/null
+++ b/Doc/whatsnew/changelog.rst
@@ -0,0 +1,6 @@
++++++++++
+Changelog
++++++++++
+
+.. miscnews:: ../../Misc/NEWS
+
diff --git a/Doc/whatsnew/index.rst b/Doc/whatsnew/index.rst
index 8220bd2..bc1206b 100644
--- a/Doc/whatsnew/index.rst
+++ b/Doc/whatsnew/index.rst
@@ -11,6 +11,7 @@ anyone wishing to stay up-to-date after a new release.
.. toctree::
:maxdepth: 2
+ 3.3.rst
3.2.rst
3.1.rst
3.0.rst
@@ -22,3 +23,11 @@ anyone wishing to stay up-to-date after a new release.
2.2.rst
2.1.rst
2.0.rst
+
+The "Changelog" is a HTML version of the file :source:`Misc/NEWS` which
+contains *all* nontrivial changes to Python for the current version.
+
+.. toctree::
+ :maxdepth: 2
+
+ changelog.rst
diff --git a/Grammar/Grammar b/Grammar/Grammar
index cea68de..d7aaffd 100644
--- a/Grammar/Grammar
+++ b/Grammar/Grammar
@@ -13,7 +13,7 @@
# Start symbols for the grammar:
# single_input is a single interactive statement;
# file_input is a module or sequence of commands read from an input file;
-# eval_input is the input for the eval() and input() functions.
+# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
@@ -129,4 +129,5 @@ comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
-yield_expr: 'yield' [testlist]
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
diff --git a/Include/Python-ast.h b/Include/Python-ast.h
index 0ad788b..7ad6cb3 100644
--- a/Include/Python-ast.h
+++ b/Include/Python-ast.h
@@ -36,6 +36,8 @@ typedef struct _keyword *keyword_ty;
typedef struct _alias *alias_ty;
+typedef struct _withitem *withitem_ty;
+
enum _mod_kind {Module_kind=1, Interactive_kind=2, Expression_kind=3,
Suite_kind=4};
@@ -64,10 +66,9 @@ struct _mod {
enum _stmt_kind {FunctionDef_kind=1, ClassDef_kind=2, Return_kind=3,
Delete_kind=4, Assign_kind=5, AugAssign_kind=6, For_kind=7,
While_kind=8, If_kind=9, With_kind=10, Raise_kind=11,
- TryExcept_kind=12, TryFinally_kind=13, Assert_kind=14,
- Import_kind=15, ImportFrom_kind=16, Global_kind=17,
- Nonlocal_kind=18, Expr_kind=19, Pass_kind=20, Break_kind=21,
- Continue_kind=22};
+ Try_kind=12, Assert_kind=13, Import_kind=14,
+ ImportFrom_kind=15, Global_kind=16, Nonlocal_kind=17,
+ Expr_kind=18, Pass_kind=19, Break_kind=20, Continue_kind=21};
struct _stmt {
enum _stmt_kind kind;
union {
@@ -128,8 +129,7 @@ struct _stmt {
} If;
struct {
- expr_ty context_expr;
- expr_ty optional_vars;
+ asdl_seq *items;
asdl_seq *body;
} With;
@@ -142,12 +142,8 @@ struct _stmt {
asdl_seq *body;
asdl_seq *handlers;
asdl_seq *orelse;
- } TryExcept;
-
- struct {
- asdl_seq *body;
asdl_seq *finalbody;
- } TryFinally;
+ } Try;
struct {
expr_ty test;
@@ -184,10 +180,10 @@ struct _stmt {
enum _expr_kind {BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4,
IfExp_kind=5, Dict_kind=6, Set_kind=7, ListComp_kind=8,
SetComp_kind=9, DictComp_kind=10, GeneratorExp_kind=11,
- Yield_kind=12, Compare_kind=13, Call_kind=14, Num_kind=15,
- Str_kind=16, Bytes_kind=17, Ellipsis_kind=18,
- Attribute_kind=19, Subscript_kind=20, Starred_kind=21,
- Name_kind=22, List_kind=23, Tuple_kind=24};
+ Yield_kind=12, YieldFrom_kind=13, Compare_kind=14,
+ Call_kind=15, Num_kind=16, Str_kind=17, Bytes_kind=18,
+ Ellipsis_kind=19, Attribute_kind=20, Subscript_kind=21,
+ Starred_kind=22, Name_kind=23, List_kind=24, Tuple_kind=25};
struct _expr {
enum _expr_kind kind;
union {
@@ -253,6 +249,10 @@ struct _expr {
} Yield;
struct {
+ expr_ty value;
+ } YieldFrom;
+
+ struct {
expr_ty left;
asdl_int_seq *ops;
asdl_seq *comparators;
@@ -275,7 +275,7 @@ struct _expr {
} Str;
struct {
- string s;
+ bytes s;
} Bytes;
struct {
@@ -383,6 +383,11 @@ struct _alias {
identifier asname;
};
+struct _withitem {
+ expr_ty context_expr;
+ expr_ty optional_vars;
+};
+
#define Module(a0, a1) _Py_Module(a0, a1)
mod_ty _Py_Module(asdl_seq * body, PyArena *arena);
@@ -421,18 +426,16 @@ stmt_ty _Py_While(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno,
#define If(a0, a1, a2, a3, a4, a5) _Py_If(a0, a1, a2, a3, a4, a5)
stmt_ty _Py_If(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno,
int col_offset, PyArena *arena);
-#define With(a0, a1, a2, a3, a4, a5) _Py_With(a0, a1, a2, a3, a4, a5)
-stmt_ty _Py_With(expr_ty context_expr, expr_ty optional_vars, asdl_seq * body,
- int lineno, int col_offset, PyArena *arena);
+#define With(a0, a1, a2, a3, a4) _Py_With(a0, a1, a2, a3, a4)
+stmt_ty _Py_With(asdl_seq * items, asdl_seq * body, int lineno, int col_offset,
+ PyArena *arena);
#define Raise(a0, a1, a2, a3, a4) _Py_Raise(a0, a1, a2, a3, a4)
stmt_ty _Py_Raise(expr_ty exc, expr_ty cause, int lineno, int col_offset,
PyArena *arena);
-#define TryExcept(a0, a1, a2, a3, a4, a5) _Py_TryExcept(a0, a1, a2, a3, a4, a5)
-stmt_ty _Py_TryExcept(asdl_seq * body, asdl_seq * handlers, asdl_seq * orelse,
- int lineno, int col_offset, PyArena *arena);
-#define TryFinally(a0, a1, a2, a3, a4) _Py_TryFinally(a0, a1, a2, a3, a4)
-stmt_ty _Py_TryFinally(asdl_seq * body, asdl_seq * finalbody, int lineno, int
- col_offset, PyArena *arena);
+#define Try(a0, a1, a2, a3, a4, a5, a6) _Py_Try(a0, a1, a2, a3, a4, a5, a6)
+stmt_ty _Py_Try(asdl_seq * body, asdl_seq * handlers, asdl_seq * orelse,
+ asdl_seq * finalbody, int lineno, int col_offset, PyArena
+ *arena);
#define Assert(a0, a1, a2, a3, a4) _Py_Assert(a0, a1, a2, a3, a4)
stmt_ty _Py_Assert(expr_ty test, expr_ty msg, int lineno, int col_offset,
PyArena *arena);
@@ -490,6 +493,9 @@ expr_ty _Py_GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno, int
col_offset, PyArena *arena);
#define Yield(a0, a1, a2, a3) _Py_Yield(a0, a1, a2, a3)
expr_ty _Py_Yield(expr_ty value, int lineno, int col_offset, PyArena *arena);
+#define YieldFrom(a0, a1, a2, a3) _Py_YieldFrom(a0, a1, a2, a3)
+expr_ty _Py_YieldFrom(expr_ty value, int lineno, int col_offset, PyArena
+ *arena);
#define Compare(a0, a1, a2, a3, a4, a5) _Py_Compare(a0, a1, a2, a3, a4, a5)
expr_ty _Py_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators,
int lineno, int col_offset, PyArena *arena);
@@ -502,7 +508,7 @@ expr_ty _Py_Num(object n, int lineno, int col_offset, PyArena *arena);
#define Str(a0, a1, a2, a3) _Py_Str(a0, a1, a2, a3)
expr_ty _Py_Str(string s, int lineno, int col_offset, PyArena *arena);
#define Bytes(a0, a1, a2, a3) _Py_Bytes(a0, a1, a2, a3)
-expr_ty _Py_Bytes(string s, int lineno, int col_offset, PyArena *arena);
+expr_ty _Py_Bytes(bytes s, int lineno, int col_offset, PyArena *arena);
#define Ellipsis(a0, a1, a2) _Py_Ellipsis(a0, a1, a2)
expr_ty _Py_Ellipsis(int lineno, int col_offset, PyArena *arena);
#define Attribute(a0, a1, a2, a3, a4, a5) _Py_Attribute(a0, a1, a2, a3, a4, a5)
@@ -547,6 +553,9 @@ arg_ty _Py_arg(identifier arg, expr_ty annotation, PyArena *arena);
keyword_ty _Py_keyword(identifier arg, expr_ty value, PyArena *arena);
#define alias(a0, a1, a2) _Py_alias(a0, a1, a2)
alias_ty _Py_alias(identifier name, identifier asname, PyArena *arena);
+#define withitem(a0, a1, a2) _Py_withitem(a0, a1, a2)
+withitem_ty _Py_withitem(expr_ty context_expr, expr_ty optional_vars, PyArena
+ *arena);
PyObject* PyAST_mod2obj(mod_ty t);
mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);
diff --git a/Include/Python.h b/Include/Python.h
index d6e47c2..a78a721 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -48,6 +48,7 @@
#include <assert.h>
#include "pyport.h"
+#include "pymacro.h"
#include "pyatomic.h"
@@ -100,6 +101,7 @@
#include "warnings.h"
#include "weakrefobject.h"
#include "structseq.h"
+#include "namespaceobject.h"
#include "codecs.h"
#include "pyerrors.h"
@@ -125,43 +127,6 @@
#include "pystrcmp.h"
#include "dtoa.h"
#include "fileutils.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* _Py_Mangle is defined in compile.c */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-/* Argument must be a char or an int in [-128, 127] or [0, 255]. */
-#define Py_CHARMASK(c) ((unsigned char)((c) & 0xff))
-
#include "pyfpe.h"
-/* These definitions must match corresponding definitions in graminit.h.
- There's code in compile.c that checks that they are the same. */
-#define Py_single_input 256
-#define Py_file_input 257
-#define Py_eval_input 258
-
-#ifdef HAVE_PTH
-/* GNU pth user-space thread support */
-#include <pth.h>
-#endif
-
-/* Define macros for inline documentation. */
-#define PyDoc_VAR(name) static char name[]
-#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
-#ifdef WITH_DOC_STRINGS
-#define PyDoc_STR(str) str
-#else
-#define PyDoc_STR(str) ""
-#endif
-
#endif /* !Py_PYTHON_H */
diff --git a/Include/abstract.h b/Include/abstract.h
index 0fe0956..44b5af7 100644
--- a/Include/abstract.h
+++ b/Include/abstract.h
@@ -7,6 +7,7 @@ extern "C" {
#ifdef PY_SSIZE_T_CLEAN
#define PyObject_CallFunction _PyObject_CallFunction_SizeT
#define PyObject_CallMethod _PyObject_CallMethod_SizeT
+#define _PyObject_CallMethodId _PyObject_CallMethodId_SizeT
#endif
/* Abstract Object Interface (many thanks to Jim Fulton) */
@@ -307,11 +308,22 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
Python expression: o.method(args).
*/
+ PyAPI_FUNC(PyObject *) _PyObject_CallMethodId(PyObject *o, _Py_Identifier *method,
+ char *format, ...);
+
+ /*
+ Like PyObject_CallMethod, but expect a _Py_Identifier* as the
+ method name.
+ */
+
PyAPI_FUNC(PyObject *) _PyObject_CallFunction_SizeT(PyObject *callable,
char *format, ...);
PyAPI_FUNC(PyObject *) _PyObject_CallMethod_SizeT(PyObject *o,
char *name,
char *format, ...);
+ PyAPI_FUNC(PyObject *) _PyObject_CallMethodId_SizeT(PyObject *o,
+ _Py_Identifier *name,
+ char *format, ...);
PyAPI_FUNC(PyObject *) PyObject_CallFunctionObjArgs(PyObject *callable,
...);
@@ -327,6 +339,10 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
PyAPI_FUNC(PyObject *) PyObject_CallMethodObjArgs(PyObject *o,
PyObject *method, ...);
+ PyAPI_FUNC(PyObject *) _PyObject_CallMethodObjIdArgs(PyObject *o,
+ struct _Py_Identifier *method,
+ ...);
+
/*
Call the method named m of object o with a variable number of
@@ -519,11 +535,12 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
+ /* Implementation in memoryobject.c */
PyAPI_FUNC(int) PyBuffer_ToContiguous(void *buf, Py_buffer *view,
- Py_ssize_t len, char fort);
+ Py_ssize_t len, char order);
PyAPI_FUNC(int) PyBuffer_FromContiguous(Py_buffer *view, void *buf,
- Py_ssize_t len, char fort);
+ Py_ssize_t len, char order);
/* Copy len bytes of data from the contiguous chunk of memory
@@ -547,7 +564,7 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
/* Copy the data from the src buffer to the buffer of destination
*/
- PyAPI_FUNC(int) PyBuffer_IsContiguous(Py_buffer *view, char fort);
+ PyAPI_FUNC(int) PyBuffer_IsContiguous(const Py_buffer *view, char fort);
PyAPI_FUNC(void) PyBuffer_FillContiguousStrides(int ndims,
@@ -761,21 +778,6 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
PyAPI_FUNC(Py_ssize_t) PyNumber_AsSsize_t(PyObject *o, PyObject *exc);
/*
- Returns the Integral instance converted to an int. The
- instance is expected to be int or long or have an __int__
- method. Steals integral's reference. error_format will be
- used to create the TypeError if integral isn't actually an
- Integral instance. error_format should be a format string
- that can accept a char* naming integral's type.
- */
-
-#ifndef Py_LIMITED_API
- PyAPI_FUNC(PyObject *) _PyNumber_ConvertIntegralToInt(
- PyObject *integral,
- const char* error_format);
-#endif
-
- /*
Returns the object converted to Py_ssize_t by going through
PyNumber_Index first. If an overflow error occurs while
converting the int-or-long to Py_ssize_t, then the second argument
@@ -1014,7 +1016,7 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
PyAPI_FUNC(PyObject *) PySequence_Fast(PyObject *o, const char* m);
/*
- Returns the sequence, o, as a tuple, unless it's already a
+ Returns the sequence, o, as a list, unless it's already a
tuple or list. Use PySequence_Fast_GET_ITEM to access the
members of this list, and PySequence_Fast_GET_SIZE to get its length.
diff --git a/Include/asdl.h b/Include/asdl.h
index 9bb0697..6bf618f 100644
--- a/Include/asdl.h
+++ b/Include/asdl.h
@@ -3,6 +3,7 @@
typedef PyObject * identifier;
typedef PyObject * string;
+typedef PyObject * bytes;
typedef PyObject * object;
/* It would be nice if the code generated by asdl_c.py was completely
@@ -14,17 +15,17 @@ typedef PyObject * object;
/* XXX A sequence should be typed so that its use can be typechecked. */
typedef struct {
- int size;
+ Py_ssize_t size;
void *elements[1];
} asdl_seq;
typedef struct {
- int size;
+ Py_ssize_t size;
int elements[1];
} asdl_int_seq;
-asdl_seq *asdl_seq_new(int size, PyArena *arena);
-asdl_int_seq *asdl_int_seq_new(int size, PyArena *arena);
+asdl_seq *asdl_seq_new(Py_ssize_t size, PyArena *arena);
+asdl_int_seq *asdl_int_seq_new(Py_ssize_t size, PyArena *arena);
#define asdl_seq_GET(S, I) (S)->elements[(I)]
#define asdl_seq_LEN(S) ((S) == NULL ? 0 : (S)->size)
diff --git a/Include/ast.h b/Include/ast.h
index a015336..055e8dc 100644
--- a/Include/ast.h
+++ b/Include/ast.h
@@ -4,6 +4,7 @@
extern "C" {
#endif
+PyAPI_FUNC(int) PyAST_Validate(mod_ty);
PyAPI_FUNC(mod_ty) PyAST_FromNode(
const node *n,
PyCompilerFlags *flags,
diff --git a/Include/bytesobject.h b/Include/bytesobject.h
index e1af89f..d7c7ffd 100644
--- a/Include/bytesobject.h
+++ b/Include/bytesobject.h
@@ -62,8 +62,6 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *);
PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *);
#ifndef Py_LIMITED_API
PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t);
-PyAPI_FUNC(PyObject *) _PyBytes_FormatLong(PyObject*, int, int,
- int, char**, int*);
#endif
PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, Py_ssize_t,
diff --git a/Include/code.h b/Include/code.h
index e773b6a..7c7e5bf 100644
--- a/Include/code.h
+++ b/Include/code.h
@@ -22,6 +22,7 @@ typedef struct {
PyObject *co_freevars; /* tuple of strings (free variable names) */
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
/* The rest doesn't count for hash or comparisons */
+ unsigned char *co_cell2arg; /* Maps cell vars which are arguments. */
PyObject *co_filename; /* unicode (where it was loaded from) */
PyObject *co_name; /* unicode (name, for reference) */
int co_firstlineno; /* first source line number */
@@ -57,6 +58,11 @@ typedef struct {
#define CO_FUTURE_BARRY_AS_BDFL 0x40000
+/* This value is found in the co_cell2arg array when the associated cell
+ variable does not correspond to an argument. The maximum number of
+ arguments is 255 (indexed up to 254), so 255 work as a special flag.*/
+#define CO_CELL_NOT_AN_ARG 255
+
/* This should be defined if a future statement modifies the syntax.
For example, when a keyword is added.
*/
diff --git a/Include/codecs.h b/Include/codecs.h
index dff09e7..0d9e9b4 100644
--- a/Include/codecs.h
+++ b/Include/codecs.h
@@ -174,6 +174,8 @@ PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc);
/* replace the unicode encode error with backslash escapes (\x, \u and \U) */
PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc);
+PyAPI_DATA(const char *) Py_hexdigits;
+
#ifdef __cplusplus
}
#endif
diff --git a/Include/compile.h b/Include/compile.h
index bc53b39..ac2636d 100644
--- a/Include/compile.h
+++ b/Include/compile.h
@@ -1,7 +1,7 @@
-#ifndef Py_LIMITED_API
#ifndef Py_COMPILE_H
#define Py_COMPILE_H
+#ifndef Py_LIMITED_API
#include "code.h"
#ifdef __cplusplus
@@ -38,9 +38,19 @@ PyAPI_FUNC(PyCodeObject *) PyAST_CompileEx(
PyArena *arena);
PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *);
+/* _Py_Mangle is defined in compile.c */
+PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
#ifdef __cplusplus
}
#endif
-#endif /* !Py_COMPILE_H */
+
#endif /* !Py_LIMITED_API */
+
+/* These definitions must match corresponding definitions in graminit.h.
+ There's code in compile.c that checks that they are the same. */
+#define Py_single_input 256
+#define Py_file_input 257
+#define Py_eval_input 258
+
+#endif /* !Py_COMPILE_H */
diff --git a/Include/complexobject.h b/Include/complexobject.h
index c379b08..1934f3b 100644
--- a/Include/complexobject.h
+++ b/Include/complexobject.h
@@ -63,9 +63,12 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj,
- Py_UNICODE *format_spec,
- Py_ssize_t format_spec_len);
+PyAPI_FUNC(int) _PyComplex_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
#endif
#ifdef __cplusplus
diff --git a/Include/datetime.h b/Include/datetime.h
index db57a18..41e3bcf 100644
--- a/Include/datetime.h
+++ b/Include/datetime.h
@@ -135,6 +135,12 @@ typedef struct
(((PyDateTime_Time*)o)->data[4] << 8) | \
((PyDateTime_Time*)o)->data[5])
+/* Apply for time delta instances */
+#define PyDateTime_DELTA_GET_DAYS(o) (((PyDateTime_Delta*)o)->days)
+#define PyDateTime_DELTA_GET_SECONDS(o) (((PyDateTime_Delta*)o)->seconds)
+#define PyDateTime_DELTA_GET_MICROSECONDS(o) \
+ (((PyDateTime_Delta*)o)->microseconds)
+
/* Define structure for C API. */
typedef struct {
diff --git a/Include/descrobject.h b/Include/descrobject.h
index 646b3cc..e2ba97f 100644
--- a/Include/descrobject.h
+++ b/Include/descrobject.h
@@ -42,6 +42,7 @@ typedef struct {
PyObject_HEAD
PyTypeObject *d_type;
PyObject *d_name;
+ PyObject *d_qualname;
} PyDescrObject;
#define PyDescr_COMMON PyDescrObject d_common
diff --git a/Include/dictobject.h b/Include/dictobject.h
index b026785..d89aac8 100644
--- a/Include/dictobject.h
+++ b/Include/dictobject.h
@@ -13,78 +13,20 @@ extern "C" {
tuning dictionaries, and several ideas for possible optimizations.
*/
-/*
-There are three kinds of slots in the table:
-
-1. Unused. me_key == me_value == NULL
- Does not hold an active (key, value) pair now and never did. Unused can
- transition to Active upon key insertion. This is the only case in which
- me_key is NULL, and is each slot's initial state.
-
-2. Active. me_key != NULL and me_key != dummy and me_value != NULL
- Holds an active (key, value) pair. Active can transition to Dummy upon
- key deletion. This is the only case in which me_value != NULL.
-
-3. Dummy. me_key == dummy and me_value == NULL
- Previously held an active (key, value) pair, but that was deleted and an
- active pair has not yet overwritten the slot. Dummy can transition to
- Active upon key insertion. Dummy slots cannot be made Unused again
- (cannot have me_key set to NULL), else the probe sequence in case of
- collision would have no way to know they were once active.
-
-Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
-hold a search finger. The me_hash field of Unused or Dummy slots has no
-meaning otherwise.
-*/
-
-/* PyDict_MINSIZE is the minimum size of a dictionary. This many slots are
- * allocated directly in the dict object (in the ma_smalltable member).
- * It must be a power of 2, and at least 4. 8 allows dicts with no more
- * than 5 active entries to live in ma_smalltable (and so avoid an
- * additional malloc); instrumentation suggested this suffices for the
- * majority of dicts (consisting mostly of usually-small instance dicts and
- * usually-small dicts created to pass keyword arguments).
- */
#ifndef Py_LIMITED_API
-#define PyDict_MINSIZE 8
+typedef struct _dictkeysobject PyDictKeysObject;
+
+/* The ma_values pointer is NULL for a combined table
+ * or points to an array of PyObject* for a split table
+ */
typedef struct {
- /* Cached hash code of me_key. */
- Py_hash_t me_hash;
- PyObject *me_key;
- PyObject *me_value;
-} PyDictEntry;
-
-/*
-To ensure the lookup algorithm terminates, there must be at least one Unused
-slot (NULL key) in the table.
-The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
-ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
-values == the number of Active items).
-To avoid slowing down lookups on a near-full table, we resize the table when
-it's two-thirds full.
-*/
-typedef struct _dictobject PyDictObject;
-struct _dictobject {
PyObject_HEAD
- Py_ssize_t ma_fill; /* # Active + # Dummy */
- Py_ssize_t ma_used; /* # Active */
-
- /* The table contains ma_mask + 1 slots, and that's a power of 2.
- * We store the mask instead of the size because the mask is more
- * frequently needed.
- */
- Py_ssize_t ma_mask;
-
- /* ma_table points to ma_smalltable for small tables, else to
- * additional malloc'ed memory. ma_table is never NULL! This rule
- * saves repeated runtime null-tests in the workhorse getitem and
- * setitem calls.
- */
- PyDictEntry *ma_table;
- PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, Py_hash_t hash);
- PyDictEntry ma_smalltable[PyDict_MINSIZE];
-};
+ Py_ssize_t ma_used;
+ PyDictKeysObject *ma_keys;
+ PyObject **ma_values;
+} PyDictObject;
+
#endif /* Py_LIMITED_API */
PyAPI_DATA(PyTypeObject) PyDict_Type;
@@ -109,12 +51,16 @@ PyAPI_DATA(PyTypeObject) PyDictValues_Type;
PyAPI_FUNC(PyObject *) PyDict_New(void);
PyAPI_FUNC(PyObject *) PyDict_GetItem(PyObject *mp, PyObject *key);
PyAPI_FUNC(PyObject *) PyDict_GetItemWithError(PyObject *mp, PyObject *key);
+PyAPI_FUNC(PyObject *) _PyDict_GetItemIdWithError(PyObject *dp,
+ struct _Py_Identifier *key);
PyAPI_FUNC(int) PyDict_SetItem(PyObject *mp, PyObject *key, PyObject *item);
PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key);
PyAPI_FUNC(void) PyDict_Clear(PyObject *mp);
PyAPI_FUNC(int) PyDict_Next(
PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value);
#ifndef Py_LIMITED_API
+PyDictKeysObject *_PyDict_NewKeysForClass(void);
+PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *);
PyAPI_FUNC(int) _PyDict_Next(
PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash);
#endif
@@ -129,6 +75,10 @@ PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, Py_hash_t hash);
PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp);
PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp);
+Py_ssize_t _PyDict_KeysSize(PyDictKeysObject *keys);
+#define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL)
+
+PyAPI_FUNC(int) PyDict_ClearFreeList(void);
#endif
/* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */
@@ -153,9 +103,17 @@ PyAPI_FUNC(int) PyDict_MergeFromSeq2(PyObject *d,
int override);
PyAPI_FUNC(PyObject *) PyDict_GetItemString(PyObject *dp, const char *key);
+PyAPI_FUNC(PyObject *) _PyDict_GetItemId(PyObject *dp, struct _Py_Identifier *key);
PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *item);
+PyAPI_FUNC(int) _PyDict_SetItemId(PyObject *dp, struct _Py_Identifier *key, PyObject *item);
PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key);
+#ifndef Py_LIMITED_API
+int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
+PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
+PyAPI_FUNC(void) _PyDict_DebugMallocStats(FILE *out);
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/Include/dtoa.h b/Include/dtoa.h
index 819bd0f..9bfb625 100644
--- a/Include/dtoa.h
+++ b/Include/dtoa.h
@@ -8,6 +8,8 @@ PyAPI_FUNC(double) _Py_dg_strtod(const char *str, char **ptr);
PyAPI_FUNC(char *) _Py_dg_dtoa(double d, int mode, int ndigits,
int *decpt, int *sign, char **rve);
PyAPI_FUNC(void) _Py_dg_freedtoa(char *s);
+PyAPI_FUNC(double) _Py_dg_stdnan(int sign);
+PyAPI_FUNC(double) _Py_dg_infinity(int sign);
#ifdef __cplusplus
diff --git a/Include/errcode.h b/Include/errcode.h
index 6bb3cc1..5946686 100644
--- a/Include/errcode.h
+++ b/Include/errcode.h
@@ -30,6 +30,7 @@ extern "C" {
#define E_EOLS 24 /* EOL in single-quoted string */
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
#define E_IDENTIFIER 26 /* Invalid characters in identifier */
+#define E_BADSINGLE 27 /* Ill-formed single statement input */
#ifdef __cplusplus
}
diff --git a/Include/fileutils.h b/Include/fileutils.h
index 2fade9b..7c18cf2 100644
--- a/Include/fileutils.h
+++ b/Include/fileutils.h
@@ -5,6 +5,8 @@
extern "C" {
#endif
+PyAPI_FUNC(PyObject *) _Py_device_encoding(int);
+
PyAPI_FUNC(wchar_t *) _Py_char2wchar(
const char *arg,
size_t *size);
diff --git a/Include/floatobject.h b/Include/floatobject.h
index 90f0a45..e240fdb 100644
--- a/Include/floatobject.h
+++ b/Include/floatobject.h
@@ -27,12 +27,12 @@ PyAPI_DATA(PyTypeObject) PyFloat_Type;
#define Py_RETURN_NAN return PyFloat_FromDouble(Py_NAN)
#endif
-#define Py_RETURN_INF(sign) do \
- if (copysign(1., sign) == 1.) { \
- return PyFloat_FromDouble(Py_HUGE_VAL); \
- } else { \
- return PyFloat_FromDouble(-Py_HUGE_VAL); \
- } while(0)
+#define Py_RETURN_INF(sign) do \
+ if (copysign(1., sign) == 1.) { \
+ return PyFloat_FromDouble(Py_HUGE_VAL); \
+ } else { \
+ return PyFloat_FromDouble(-Py_HUGE_VAL); \
+ } while(0)
PyAPI_FUNC(double) PyFloat_GetMax(void);
PyAPI_FUNC(double) PyFloat_GetMin(void);
@@ -110,11 +110,16 @@ PyAPI_FUNC(double) _PyFloat_Unpack8(const unsigned char *p, int le);
/* free list api */
PyAPI_FUNC(int) PyFloat_ClearFreeList(void);
+PyAPI_FUNC(void) _PyFloat_DebugMallocStats(FILE* out);
+
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj,
- Py_UNICODE *format_spec,
- Py_ssize_t format_spec_len);
+PyAPI_FUNC(int) _PyFloat_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
#endif /* Py_LIMITED_API */
#ifdef __cplusplus
diff --git a/Include/frameobject.h b/Include/frameobject.h
index 1fb64bb..ac2f790 100644
--- a/Include/frameobject.h
+++ b/Include/frameobject.h
@@ -9,45 +9,45 @@ extern "C" {
#endif
typedef struct {
- int b_type; /* what kind of block this is */
- int b_handler; /* where to jump to find handler */
- int b_level; /* value stack level to pop to */
+ int b_type; /* what kind of block this is */
+ int b_handler; /* where to jump to find handler */
+ int b_level; /* value stack level to pop to */
} PyTryBlock;
typedef struct _frame {
PyObject_VAR_HEAD
- struct _frame *f_back; /* previous frame, or NULL */
- PyCodeObject *f_code; /* code segment */
- PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
- PyObject *f_globals; /* global symbol table (PyDictObject) */
- PyObject *f_locals; /* local symbol table (any mapping) */
- PyObject **f_valuestack; /* points after the last local */
+ struct _frame *f_back; /* previous frame, or NULL */
+ PyCodeObject *f_code; /* code segment */
+ PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
+ PyObject *f_globals; /* global symbol table (PyDictObject) */
+ PyObject *f_locals; /* local symbol table (any mapping) */
+ PyObject **f_valuestack; /* points after the last local */
/* Next free slot in f_valuestack. Frame creation sets to f_valuestack.
Frame evaluation usually NULLs it, but a frame that yields sets it
to the current stack top. */
PyObject **f_stacktop;
- PyObject *f_trace; /* Trace function */
-
- /* In a generator, we need to be able to swap between the exception
- state inside the generator and the exception state of the calling
- frame (which shouldn't be impacted when the generator "yields"
- from an except handler).
- These three fields exist exactly for that, and are unused for
- non-generator frames. See the SAVE_EXC_STATE and SWAP_EXC_STATE
- macros in ceval.c for details of their use. */
+ PyObject *f_trace; /* Trace function */
+
+ /* In a generator, we need to be able to swap between the exception
+ state inside the generator and the exception state of the calling
+ frame (which shouldn't be impacted when the generator "yields"
+ from an except handler).
+ These three fields exist exactly for that, and are unused for
+ non-generator frames. See the SAVE_EXC_STATE and SWAP_EXC_STATE
+ macros in ceval.c for details of their use. */
PyObject *f_exc_type, *f_exc_value, *f_exc_traceback;
PyThreadState *f_tstate;
- int f_lasti; /* Last instruction if called */
+ int f_lasti; /* Last instruction if called */
/* Call PyFrame_GetLineNumber() instead of reading this field
directly. As of 2.3 f_lineno is only valid when tracing is
active (i.e. when f_trace is set). At other times we use
PyCode_Addr2Line to calculate the line from the current
bytecode index. */
- int f_lineno; /* Current line number */
- int f_iblock; /* index in f_blockstack */
+ int f_lineno; /* Current line number */
+ int f_iblock; /* index in f_blockstack */
PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
- PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
+ PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
} PyFrameObject;
@@ -79,6 +79,8 @@ PyAPI_FUNC(void) PyFrame_FastToLocals(PyFrameObject *);
PyAPI_FUNC(int) PyFrame_ClearFreeList(void);
+PyAPI_FUNC(void) _PyFrame_DebugMallocStats(FILE *out);
+
/* Return the line of code the frame is currently executing. */
PyAPI_FUNC(int) PyFrame_GetLineNumber(PyFrameObject *);
diff --git a/Include/funcobject.h b/Include/funcobject.h
index 521d87b..cc1426c 100644
--- a/Include/funcobject.h
+++ b/Include/funcobject.h
@@ -31,6 +31,7 @@ typedef struct {
PyObject *func_weakreflist; /* List of weak references */
PyObject *func_module; /* The __module__ attribute, can be anything */
PyObject *func_annotations; /* Annotations, a dict or NULL */
+ PyObject *func_qualname; /* The qualified name */
/* Invariant:
* func_closure contains the bindings for func_code->co_freevars, so
@@ -44,6 +45,7 @@ PyAPI_DATA(PyTypeObject) PyFunction_Type;
#define PyFunction_Check(op) (Py_TYPE(op) == &PyFunction_Type)
PyAPI_FUNC(PyObject *) PyFunction_New(PyObject *, PyObject *);
+PyAPI_FUNC(PyObject *) PyFunction_NewWithQualName(PyObject *, PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) PyFunction_GetCode(PyObject *);
PyAPI_FUNC(PyObject *) PyFunction_GetGlobals(PyObject *);
PyAPI_FUNC(PyObject *) PyFunction_GetModule(PyObject *);
diff --git a/Include/genobject.h b/Include/genobject.h
index d29fb1e..ed451ba 100644
--- a/Include/genobject.h
+++ b/Include/genobject.h
@@ -11,20 +11,20 @@ extern "C" {
struct _frame; /* Avoid including frameobject.h */
typedef struct {
- PyObject_HEAD
- /* The gi_ prefix is intended to remind of generator-iterator. */
+ PyObject_HEAD
+ /* The gi_ prefix is intended to remind of generator-iterator. */
- /* Note: gi_frame can be NULL if the generator is "finished" */
- struct _frame *gi_frame;
+ /* Note: gi_frame can be NULL if the generator is "finished" */
+ struct _frame *gi_frame;
- /* True if generator is being executed. */
- int gi_running;
-
- /* The code object backing the generator */
- PyObject *gi_code;
+ /* True if generator is being executed. */
+ char gi_running;
- /* List of weak reference. */
- PyObject *gi_weakreflist;
+ /* The code object backing the generator */
+ PyObject *gi_code;
+
+ /* List of weak reference. */
+ PyObject *gi_weakreflist;
} PyGenObject;
PyAPI_DATA(PyTypeObject) PyGen_Type;
@@ -34,6 +34,8 @@ PyAPI_DATA(PyTypeObject) PyGen_Type;
PyAPI_FUNC(PyObject *) PyGen_New(struct _frame *);
PyAPI_FUNC(int) PyGen_NeedsFinalizing(PyGenObject *);
+PyAPI_FUNC(int) _PyGen_FetchStopIterationValue(PyObject **);
+PyObject *_PyGen_Send(PyGenObject *, PyObject *);
#ifdef __cplusplus
}
diff --git a/Include/graminit.h b/Include/graminit.h
index e0e27f9..3ec949a 100644
--- a/Include/graminit.h
+++ b/Include/graminit.h
@@ -81,3 +81,4 @@
#define comp_if 334
#define encoding_decl 335
#define yield_expr 336
+#define yield_arg 337
diff --git a/Include/import.h b/Include/import.h
index 400e97c..fdc2733 100644
--- a/Include/import.h
+++ b/Include/import.h
@@ -7,6 +7,9 @@
extern "C" {
#endif
+PyAPI_FUNC(void) _PyImportZip_Init(void);
+
+PyMODINIT_FUNC PyInit_imp(void);
PyAPI_FUNC(long) PyImport_GetMagicNumber(void);
PyAPI_FUNC(const char *) PyImport_GetMagicTag(void);
PyAPI_FUNC(PyObject *) PyImport_ExecCodeModule(
@@ -24,7 +27,16 @@ PyAPI_FUNC(PyObject *) PyImport_ExecCodeModuleWithPathnames(
char *pathname, /* decoded from the filesystem encoding */
char *cpathname /* decoded from the filesystem encoding */
);
+PyAPI_FUNC(PyObject *) PyImport_ExecCodeModuleObject(
+ PyObject *name,
+ PyObject *co,
+ PyObject *pathname,
+ PyObject *cpathname
+ );
PyAPI_FUNC(PyObject *) PyImport_GetModuleDict(void);
+PyAPI_FUNC(PyObject *) PyImport_AddModuleObject(
+ PyObject *name
+ );
PyAPI_FUNC(PyObject *) PyImport_AddModule(
const char *name /* UTF-8 encoded string */
);
@@ -35,7 +47,14 @@ PyAPI_FUNC(PyObject *) PyImport_ImportModuleNoBlock(
const char *name /* UTF-8 encoded string */
);
PyAPI_FUNC(PyObject *) PyImport_ImportModuleLevel(
- char *name, /* UTF-8 encoded string */
+ const char *name, /* UTF-8 encoded string */
+ PyObject *globals,
+ PyObject *locals,
+ PyObject *fromlist,
+ int level
+ );
+PyAPI_FUNC(PyObject *) PyImport_ImportModuleLevelObject(
+ PyObject *name,
PyObject *globals,
PyObject *locals,
PyObject *fromlist,
@@ -43,12 +62,15 @@ PyAPI_FUNC(PyObject *) PyImport_ImportModuleLevel(
);
#define PyImport_ImportModuleEx(n, g, l, f) \
- PyImport_ImportModuleLevel(n, g, l, f, -1)
+ PyImport_ImportModuleLevel(n, g, l, f, 0)
PyAPI_FUNC(PyObject *) PyImport_GetImporter(PyObject *path);
PyAPI_FUNC(PyObject *) PyImport_Import(PyObject *name);
PyAPI_FUNC(PyObject *) PyImport_ReloadModule(PyObject *m);
PyAPI_FUNC(void) PyImport_Cleanup(void);
+PyAPI_FUNC(int) PyImport_ImportFrozenModuleObject(
+ PyObject *name
+ );
PyAPI_FUNC(int) PyImport_ImportFrozenModule(
char *name /* UTF-8 encoded string */
);
@@ -65,17 +87,17 @@ PyAPI_FUNC(int) _PyImport_ReleaseLock(void);
PyAPI_FUNC(void) _PyImport_ReInitLock(void);
PyAPI_FUNC(PyObject *)_PyImport_FindBuiltin(
- char *name /* UTF-8 encoded string */
+ const char *name /* UTF-8 encoded string */
);
-PyAPI_FUNC(PyObject *)_PyImport_FindExtensionUnicode(char *, PyObject *);
+PyAPI_FUNC(PyObject *)_PyImport_FindExtensionObject(PyObject *, PyObject *);
PyAPI_FUNC(int)_PyImport_FixupBuiltin(
PyObject *mod,
char *name /* UTF-8 encoded string */
);
-PyAPI_FUNC(int)_PyImport_FixupExtensionUnicode(PyObject*, char *, PyObject *);
+PyAPI_FUNC(int)_PyImport_FixupExtensionObject(PyObject*, PyObject *, PyObject *);
struct _inittab {
- char *name;
+ char *name; /* ASCII encoded string */
PyObject* (*initfunc)(void);
};
PyAPI_DATA(struct _inittab *) PyImport_Inittab;
diff --git a/Include/intrcheck.h b/Include/intrcheck.h
index 3b67ed0..f53fee1 100644
--- a/Include/intrcheck.h
+++ b/Include/intrcheck.h
@@ -8,6 +8,12 @@ extern "C" {
PyAPI_FUNC(int) PyOS_InterruptOccurred(void);
PyAPI_FUNC(void) PyOS_InitInterrupts(void);
PyAPI_FUNC(void) PyOS_AfterFork(void);
+PyAPI_FUNC(int) _PyOS_IsMainThread(void);
+
+#ifdef MS_WINDOWS
+/* windows.h is not included by Python.h so use void* instead of HANDLE */
+PyAPI_FUNC(void*) _PyOS_SigintEvent(void);
+#endif
#ifdef __cplusplus
}
diff --git a/Include/listobject.h b/Include/listobject.h
index 949b1a3..dc62aee 100644
--- a/Include/listobject.h
+++ b/Include/listobject.h
@@ -62,6 +62,9 @@ PyAPI_FUNC(int) PyList_Reverse(PyObject *);
PyAPI_FUNC(PyObject *) PyList_AsTuple(PyObject *);
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) _PyList_Extend(PyListObject *, PyObject *);
+
+PyAPI_FUNC(int) PyList_ClearFreeList(void);
+PyAPI_FUNC(void) _PyList_DebugMallocStats(FILE *out);
#endif
/* Macro, trading safety for speed */
diff --git a/Include/longintrepr.h b/Include/longintrepr.h
index b94f7b2..3a45bad 100644
--- a/Include/longintrepr.h
+++ b/Include/longintrepr.h
@@ -6,7 +6,7 @@ extern "C" {
#endif
-/* This is published for the benefit of "friend" marshal.c only. */
+/* This is published for the benefit of "friends" marshal.c and _decimal.c. */
/* Parameters of the long integer representation. There are two different
sets of parameters: one set for 30-bit digits, stored in an unsigned 32-bit
diff --git a/Include/longobject.h b/Include/longobject.h
index c09565a..d741f1b 100644
--- a/Include/longobject.h
+++ b/Include/longobject.h
@@ -12,7 +12,7 @@ typedef struct _longobject PyLongObject; /* Revealed in longintrepr.h */
PyAPI_DATA(PyTypeObject) PyLong_Type;
#define PyLong_Check(op) \
- PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LONG_SUBCLASS)
+ PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LONG_SUBCLASS)
#define PyLong_CheckExact(op) (Py_TYPE(op) == &PyLong_Type)
PyAPI_FUNC(PyObject *) PyLong_FromLong(long);
@@ -80,6 +80,7 @@ PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLongAndOverflow(PyObject *, int *);
PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int);
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int);
+PyAPI_FUNC(PyObject *) PyLong_FromUnicodeObject(PyObject *u, int base);
#endif
#ifndef Py_LIMITED_API
@@ -121,8 +122,8 @@ PyAPI_FUNC(PyObject *) _PyLong_DivmodNear(PyObject *, PyObject *);
enough memory to create the Python long.
*/
PyAPI_FUNC(PyObject *) _PyLong_FromByteArray(
- const unsigned char* bytes, size_t n,
- int little_endian, int is_signed);
+ const unsigned char* bytes, size_t n,
+ int little_endian, int is_signed);
/* _PyLong_AsByteArray: Convert the least-significant 8*n bits of long
v to a base-256 integer, stored in array bytes. Normally return 0,
@@ -144,19 +145,28 @@ PyAPI_FUNC(PyObject *) _PyLong_FromByteArray(
case, but bytes holds the least-signficant n bytes of the true value.
*/
PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v,
- unsigned char* bytes, size_t n,
- int little_endian, int is_signed);
+ unsigned char* bytes, size_t n,
+ int little_endian, int is_signed);
/* _PyLong_Format: Convert the long to a string object with given base,
appending a base prefix of 0[box] if base is 2, 8 or 16. */
-PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base);
+PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *obj, int base);
+
+PyAPI_FUNC(int) _PyLong_FormatWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ int base,
+ int alternate);
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj,
- Py_UNICODE *format_spec,
- Py_ssize_t format_spec_len);
+PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
#endif /* Py_LIMITED_API */
/* These aren't really part of the long object, but they're handy. The
diff --git a/Include/memoryobject.h b/Include/memoryobject.h
index 62ecbd6..c2e1194 100644
--- a/Include/memoryobject.h
+++ b/Include/memoryobject.h
@@ -6,69 +6,65 @@
extern "C" {
#endif
+#ifndef Py_LIMITED_API
+PyAPI_DATA(PyTypeObject) _PyManagedBuffer_Type;
+#endif
PyAPI_DATA(PyTypeObject) PyMemoryView_Type;
#define PyMemoryView_Check(op) (Py_TYPE(op) == &PyMemoryView_Type)
#ifndef Py_LIMITED_API
-/* Get a pointer to the underlying Py_buffer of a memoryview object. */
+/* Get a pointer to the memoryview's private copy of the exporter's buffer. */
#define PyMemoryView_GET_BUFFER(op) (&((PyMemoryViewObject *)(op))->view)
-/* Get a pointer to the PyObject from which originates a memoryview object. */
+/* Get a pointer to the exporting object (this may be NULL!). */
#define PyMemoryView_GET_BASE(op) (((PyMemoryViewObject *)(op))->view.obj)
#endif
-
-PyAPI_FUNC(PyObject *) PyMemoryView_GetContiguous(PyObject *base,
- int buffertype,
- char fort);
-
- /* Return a contiguous chunk of memory representing the buffer
- from an object in a memory view object. If a copy is made then the
- base object for the memory view will be a *new* bytes object.
-
- Otherwise, the base-object will be the object itself and no
- data-copying will be done.
-
- The buffertype argument can be PyBUF_READ, PyBUF_WRITE,
- PyBUF_SHADOW to determine whether the returned buffer
- should be READONLY, WRITABLE, or set to update the
- original buffer if a copy must be made. If buffertype is
- PyBUF_WRITE and the buffer is not contiguous an error will
- be raised. In this circumstance, the user can use
- PyBUF_SHADOW to ensure that a a writable temporary
- contiguous buffer is returned. The contents of this
- contiguous buffer will be copied back into the original
- object after the memoryview object is deleted as long as
- the original object is writable and allows setting an
- exclusive write lock. If this is not allowed by the
- original object, then a BufferError is raised.
-
- If the object is multi-dimensional and if fortran is 'F',
- the first dimension of the underlying array will vary the
- fastest in the buffer. If fortran is 'C', then the last
- dimension will vary the fastest (C-style contiguous). If
- fortran is 'A', then it does not matter and you will get
- whatever the object decides is more efficient.
-
- A new reference is returned that must be DECREF'd when finished.
- */
-
PyAPI_FUNC(PyObject *) PyMemoryView_FromObject(PyObject *base);
-
+PyAPI_FUNC(PyObject *) PyMemoryView_FromMemory(char *mem, Py_ssize_t size,
+ int flags);
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) PyMemoryView_FromBuffer(Py_buffer *info);
- /* create new if bufptr is NULL
- will be a new bytesobject in base */
#endif
+PyAPI_FUNC(PyObject *) PyMemoryView_GetContiguous(PyObject *base,
+ int buffertype,
+ char order);
-/* The struct is declared here so that macros can work, but it shouldn't
- be considered public. Don't access those fields directly, use the macros
+/* The structs are declared here so that macros can work, but they shouldn't
+ be considered public. Don't access their fields directly, use the macros
and functions instead! */
#ifndef Py_LIMITED_API
+#define _Py_MANAGED_BUFFER_RELEASED 0x001 /* access to exporter blocked */
+#define _Py_MANAGED_BUFFER_FREE_FORMAT 0x002 /* free format */
typedef struct {
PyObject_HEAD
- Py_buffer view;
+ int flags; /* state flags */
+ Py_ssize_t exports; /* number of direct memoryview exports */
+ Py_buffer master; /* snapshot buffer obtained from the original exporter */
+} _PyManagedBufferObject;
+
+
+/* static storage used for casting between formats */
+#define _Py_MEMORYVIEW_MAX_FORMAT 3 /* must be >= 3 */
+
+/* memoryview state flags */
+#define _Py_MEMORYVIEW_RELEASED 0x001 /* access to master buffer blocked */
+#define _Py_MEMORYVIEW_C 0x002 /* C-contiguous layout */
+#define _Py_MEMORYVIEW_FORTRAN 0x004 /* Fortran contiguous layout */
+#define _Py_MEMORYVIEW_SCALAR 0x008 /* scalar: ndim = 0 */
+#define _Py_MEMORYVIEW_PIL 0x010 /* PIL-style layout */
+
+typedef struct {
+ PyObject_VAR_HEAD
+ _PyManagedBufferObject *mbuf; /* managed buffer */
+ Py_hash_t hash; /* hash value for read-only views */
+ int flags; /* state flags */
+ Py_ssize_t exports; /* number of buffer re-exports */
+ Py_buffer view; /* private copy of the exporter's view */
+ char format[_Py_MEMORYVIEW_MAX_FORMAT]; /* used for casting */
+ PyObject *weakreflist;
+ Py_ssize_t ob_array[1]; /* shape, strides, suboffsets */
} PyMemoryViewObject;
#endif
diff --git a/Include/methodobject.h b/Include/methodobject.h
index 7e67c0b..3cc2ea9 100644
--- a/Include/methodobject.h
+++ b/Include/methodobject.h
@@ -17,7 +17,7 @@ PyAPI_DATA(PyTypeObject) PyCFunction_Type;
typedef PyObject *(*PyCFunction)(PyObject *, PyObject *);
typedef PyObject *(*PyCFunctionWithKeywords)(PyObject *, PyObject *,
- PyObject *);
+ PyObject *);
typedef PyObject *(*PyNoArgsFunction)(PyObject *);
PyAPI_FUNC(PyCFunction) PyCFunction_GetFunction(PyObject *);
@@ -30,24 +30,25 @@ PyAPI_FUNC(int) PyCFunction_GetFlags(PyObject *);
#define PyCFunction_GET_FUNCTION(func) \
(((PyCFunctionObject *)func) -> m_ml -> ml_meth)
#define PyCFunction_GET_SELF(func) \
- (((PyCFunctionObject *)func) -> m_self)
+ (((PyCFunctionObject *)func) -> m_ml -> ml_flags & METH_STATIC ? \
+ NULL : ((PyCFunctionObject *)func) -> m_self)
#define PyCFunction_GET_FLAGS(func) \
- (((PyCFunctionObject *)func) -> m_ml -> ml_flags)
+ (((PyCFunctionObject *)func) -> m_ml -> ml_flags)
#endif
PyAPI_FUNC(PyObject *) PyCFunction_Call(PyObject *, PyObject *, PyObject *);
struct PyMethodDef {
- const char *ml_name; /* The name of the built-in function/method */
- PyCFunction ml_meth; /* The C function that implements it */
- int ml_flags; /* Combination of METH_xxx flags, which mostly
- describe the args expected by the C func */
- const char *ml_doc; /* The __doc__ attribute, or NULL */
+ const char *ml_name; /* The name of the built-in function/method */
+ PyCFunction ml_meth; /* The C function that implements it */
+ int ml_flags; /* Combination of METH_xxx flags, which mostly
+ describe the args expected by the C func */
+ const char *ml_doc; /* The __doc__ attribute, or NULL */
};
typedef struct PyMethodDef PyMethodDef;
#define PyCFunction_New(ML, SELF) PyCFunction_NewEx((ML), (SELF), NULL)
PyAPI_FUNC(PyObject *) PyCFunction_NewEx(PyMethodDef *, PyObject *,
- PyObject *);
+ PyObject *);
/* Flag passed to newmethodobject */
/* #define METH_OLDARGS 0x0000 -- unsupported now */
@@ -81,6 +82,11 @@ typedef struct {
PyAPI_FUNC(int) PyCFunction_ClearFreeList(void);
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(void) _PyCFunction_DebugMallocStats(FILE *out);
+PyAPI_FUNC(void) _PyMethod_DebugMallocStats(FILE *out);
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/Include/modsupport.h b/Include/modsupport.h
index bf6478f..ecf1dcc 100644
--- a/Include/modsupport.h
+++ b/Include/modsupport.h
@@ -23,6 +23,8 @@ extern "C" {
PyAPI_FUNC(PyObject *) _Py_VaBuildValue_SizeT(const char *, va_list);
#endif
+/* Due to a glitch in 3.2, the _SizeT versions weren't exported from the DLL. */
+#if !defined(PY_SSIZE_T_CLEAN) || !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
PyAPI_FUNC(int) PyArg_Parse(PyObject *, const char *, ...);
PyAPI_FUNC(int) PyArg_ParseTuple(PyObject *, const char *, ...) Py_FORMAT_PARSETUPLE(PyArg_ParseTuple, 2, 3);
PyAPI_FUNC(int) PyArg_ParseTupleAndKeywords(PyObject *, PyObject *,
@@ -31,13 +33,14 @@ PyAPI_FUNC(int) PyArg_ValidateKeywordArguments(PyObject *);
PyAPI_FUNC(int) PyArg_UnpackTuple(PyObject *, const char *, Py_ssize_t, Py_ssize_t, ...);
PyAPI_FUNC(PyObject *) Py_BuildValue(const char *, ...);
PyAPI_FUNC(PyObject *) _Py_BuildValue_SizeT(const char *, ...);
+#endif
#ifndef Py_LIMITED_API
PyAPI_FUNC(int) _PyArg_NoKeywords(const char *funcname, PyObject *kw);
-#endif
PyAPI_FUNC(int) PyArg_VaParse(PyObject *, const char *, va_list);
PyAPI_FUNC(int) PyArg_VaParseTupleAndKeywords(PyObject *, PyObject *,
const char *, char **, va_list);
+#endif
PyAPI_FUNC(PyObject *) Py_VaBuildValue(const char *, va_list);
PyAPI_FUNC(int) PyModule_AddObject(PyObject *, const char *, PyObject *);
diff --git a/Include/moduleobject.h b/Include/moduleobject.h
index 7b2bf1c..8013dd9 100644
--- a/Include/moduleobject.h
+++ b/Include/moduleobject.h
@@ -12,10 +12,14 @@ PyAPI_DATA(PyTypeObject) PyModule_Type;
#define PyModule_Check(op) PyObject_TypeCheck(op, &PyModule_Type)
#define PyModule_CheckExact(op) (Py_TYPE(op) == &PyModule_Type)
+PyAPI_FUNC(PyObject *) PyModule_NewObject(
+ PyObject *name
+ );
PyAPI_FUNC(PyObject *) PyModule_New(
const char *name /* UTF-8 encoded string */
);
PyAPI_FUNC(PyObject *) PyModule_GetDict(PyObject *);
+PyAPI_FUNC(PyObject *) PyModule_GetNameObject(PyObject *);
PyAPI_FUNC(const char *) PyModule_GetName(PyObject *);
PyAPI_FUNC(const char *) PyModule_GetFilename(PyObject *);
PyAPI_FUNC(PyObject *) PyModule_GetFilenameObject(PyObject *);
diff --git a/Include/namespaceobject.h b/Include/namespaceobject.h
new file mode 100644
index 0000000..a412f05
--- /dev/null
+++ b/Include/namespaceobject.h
@@ -0,0 +1,17 @@
+
+/* simple namespace object interface */
+
+#ifndef NAMESPACEOBJECT_H
+#define NAMESPACEOBJECT_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+PyAPI_DATA(PyTypeObject) _PyNamespace_Type;
+
+PyAPI_FUNC(PyObject *) _PyNamespace_New(PyObject *kwds);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !NAMESPACEOBJECT_H */
diff --git a/Include/node.h b/Include/node.h
index 9f6760c..99c13f7 100644
--- a/Include/node.h
+++ b/Include/node.h
@@ -31,6 +31,7 @@ Py_ssize_t _PyNode_SizeOf(node *n);
#define RCHILD(n, i) (CHILD(n, NCH(n) + i))
#define TYPE(n) ((n)->n_type)
#define STR(n) ((n)->n_str)
+#define LINENO(n) ((n)->n_lineno)
/* Assert that the type of a node is what we expect */
#define REQ(n, type) assert(TYPE(n) == (type))
diff --git a/Include/object.h b/Include/object.h
index 6120ab813..387cadb 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -117,6 +117,35 @@ typedef struct {
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
+/********************* String Literals ****************************************/
+/* This structure helps managing static strings. The basic usage goes like this:
+ Instead of doing
+
+ r = PyObject_CallMethod(o, "foo", "args", ...);
+
+ do
+
+ _Py_IDENTIFIER(foo);
+ ...
+ r = _PyObject_CallMethodId(o, &PyId_foo, "args", ...);
+
+ PyId_foo is a static variable, either on block level or file level. On first
+ usage, the string "foo" is interned, and the structures are linked. On interpreter
+ shutdown, all strings are released (through _PyUnicode_ClearStaticStrings).
+
+ Alternatively, _Py_static_string allows to choose the variable name.
+ _PyUnicode_FromId returns a borrowed reference to the interned string.
+ _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
+*/
+typedef struct _Py_Identifier {
+ struct _Py_Identifier *next;
+ const char* string;
+ PyObject *object;
+} _Py_Identifier;
+
+#define _Py_static_string(varname, value) static _Py_Identifier varname = { 0, value, 0 }
+#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)
+
/*
Type objects contain a string containing the type name (to help somewhat
in debugging), the allocation parameters (see PyObject_New() and
@@ -157,15 +186,16 @@ typedef struct bufferinfo {
Py_ssize_t *shape;
Py_ssize_t *strides;
Py_ssize_t *suboffsets;
- Py_ssize_t smalltable[2]; /* static store for shape and strides of
- mono-dimensional buffers. */
void *internal;
} Py_buffer;
typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
- /* Flags for getting buffers */
+/* Maximum number of dimensions */
+#define PyBUF_MAX_NDIM 64
+
+/* Flags for getting buffers */
#define PyBUF_SIMPLE 0
#define PyBUF_WRITABLE 0x0001
/* we used to include an E, backwards compatible alias */
@@ -403,6 +433,9 @@ typedef struct{
} PyType_Spec;
PyAPI_FUNC(PyObject*) PyType_FromSpec(PyType_Spec*);
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
+PyAPI_FUNC(PyObject*) PyType_FromSpecWithBases(PyType_Spec*, PyObject*);
+#endif
#ifndef Py_LIMITED_API
/* The *real* layout of a type object when allocated on the heap */
@@ -418,7 +451,8 @@ typedef struct _heaptypeobject {
a given operator (e.g. __getitem__).
see add_operators() in typeobject.c . */
PyBufferProcs as_buffer;
- PyObject *ht_name, *ht_slots;
+ PyObject *ht_name, *ht_slots, *ht_qualname;
+ struct _dictkeysobject *ht_cached_keys;
/* here are optional user slots, followed by the members. */
} PyHeapTypeObject;
@@ -448,13 +482,14 @@ PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *,
PyObject *, PyObject *);
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *);
-PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, char *, PyObject **);
+PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, _Py_Identifier *);
PyAPI_FUNC(PyTypeObject *) _PyType_CalculateMetaclass(PyTypeObject *, PyObject *);
#endif
PyAPI_FUNC(unsigned int) PyType_ClearCache(void);
PyAPI_FUNC(void) PyType_Modified(PyTypeObject *);
/* Generic operations on objects */
+struct _Py_Identifier;
#ifndef Py_LIMITED_API
PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int);
PyAPI_FUNC(void) _Py_BreakPoint(void);
@@ -472,6 +507,10 @@ PyAPI_FUNC(int) PyObject_HasAttrString(PyObject *, const char *);
PyAPI_FUNC(PyObject *) PyObject_GetAttr(PyObject *, PyObject *);
PyAPI_FUNC(int) PyObject_SetAttr(PyObject *, PyObject *, PyObject *);
PyAPI_FUNC(int) PyObject_HasAttr(PyObject *, PyObject *);
+PyAPI_FUNC(int) _PyObject_IsAbstract(PyObject *);
+PyAPI_FUNC(PyObject *) _PyObject_GetAttrId(PyObject *, struct _Py_Identifier *);
+PyAPI_FUNC(int) _PyObject_SetAttrId(PyObject *, struct _Py_Identifier *, PyObject *);
+PyAPI_FUNC(int) _PyObject_HasAttrId(PyObject *, struct _Py_Identifier *);
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *);
#endif
@@ -482,6 +521,7 @@ PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *);
PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *);
PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *,
PyObject *, PyObject *);
+PyAPI_FUNC(int) PyObject_GenericSetDict(PyObject *, PyObject *, void *);
PyAPI_FUNC(Py_hash_t) PyObject_Hash(PyObject *);
PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *);
PyAPI_FUNC(int) PyObject_IsTrue(PyObject *);
@@ -498,6 +538,11 @@ PyAPI_FUNC(int)
_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *,
PyObject *, PyObject *);
+/* Helper to look up a builtin object */
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject *)
+_PyObject_GetBuiltin(const char *name);
+#endif
/* PyObject_Dir(obj) acts like Python builtins.dir(obj), returning a
list of strings. PyObject_Dir(NULL) is like builtins.dir(),
@@ -515,6 +560,7 @@ PyAPI_FUNC(void) Py_ReprLeave(PyObject *);
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
+PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
#endif
typedef struct {
@@ -803,6 +849,10 @@ not implemented for a given type combination.
PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */
#define Py_NotImplemented (&_Py_NotImplementedStruct)
+/* Macro for returning Py_NotImplemented from a function */
+#define Py_RETURN_NOTIMPLEMENTED \
+ return Py_INCREF(Py_NotImplemented), Py_NotImplemented
+
/* Rich comparison opcodes */
#define Py_LT 0
#define Py_LE 1
@@ -912,7 +962,7 @@ with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL.
*/
/* This is the old private API, invoked by the macros before 3.2.4.
- Kept for binary compatibility of extensions. */
+ Kept for binary compatibility of extensions using the stable ABI. */
PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*);
PyAPI_FUNC(void) _PyTrash_destroy_chain(void);
PyAPI_DATA(int) _PyTrash_delete_nesting;
@@ -939,6 +989,14 @@ PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(void);
_PyTrash_thread_deposit_object((PyObject*)op); \
} while (0);
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(void)
+_PyDebugAllocatorStats(FILE *out, const char *block_name, int num_blocks,
+ size_t sizeof_block);
+PyAPI_FUNC(void)
+_PyObject_DebugTypeStats(FILE *out);
+#endif /* ifndef Py_LIMITED_API */
+
#ifdef __cplusplus
}
#endif
diff --git a/Include/objimpl.h b/Include/objimpl.h
index 8fc3fc1..3d5f509 100644
--- a/Include/objimpl.h
+++ b/Include/objimpl.h
@@ -101,13 +101,15 @@ PyAPI_FUNC(void) PyObject_Free(void *);
/* Macros */
#ifdef WITH_PYMALLOC
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(void) _PyObject_DebugMallocStats(FILE *out);
+#endif /* #ifndef Py_LIMITED_API */
#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */
PyAPI_FUNC(void *) _PyObject_DebugMalloc(size_t nbytes);
PyAPI_FUNC(void *) _PyObject_DebugRealloc(void *p, size_t nbytes);
PyAPI_FUNC(void) _PyObject_DebugFree(void *p);
PyAPI_FUNC(void) _PyObject_DebugDumpAddress(const void *p);
PyAPI_FUNC(void) _PyObject_DebugCheckAddress(const void *p);
-PyAPI_FUNC(void) _PyObject_DebugMallocStats(void);
PyAPI_FUNC(void *) _PyObject_DebugMallocApi(char api, size_t nbytes);
PyAPI_FUNC(void *) _PyObject_DebugReallocApi(char api, void *p, size_t nbytes);
PyAPI_FUNC(void) _PyObject_DebugFreeApi(char api, void *p);
@@ -179,12 +181,9 @@ PyAPI_FUNC(PyVarObject *) _PyObject_NewVar(PyTypeObject *, Py_ssize_t);
#endif
#define _PyObject_VAR_SIZE(typeobj, nitems) \
- (size_t) \
- ( ( (typeobj)->tp_basicsize + \
- (nitems)*(typeobj)->tp_itemsize + \
- (SIZEOF_VOID_P - 1) \
- ) & ~(SIZEOF_VOID_P - 1) \
- )
+ _Py_SIZE_ROUND_UP((typeobj)->tp_basicsize + \
+ (nitems)*(typeobj)->tp_itemsize, \
+ SIZEOF_VOID_P)
#define PyObject_NEW(type, typeobj) \
( (type *) PyObject_Init( \
diff --git a/Include/opcode.h b/Include/opcode.h
index 6b10944..a90184d 100644
--- a/Include/opcode.h
+++ b/Include/opcode.h
@@ -7,117 +7,117 @@ extern "C" {
/* Instruction opcodes for compiled code */
-#define STOP_CODE 0
-#define POP_TOP 1
-#define ROT_TWO 2
-#define ROT_THREE 3
-#define DUP_TOP 4
+#define POP_TOP 1
+#define ROT_TWO 2
+#define ROT_THREE 3
+#define DUP_TOP 4
#define DUP_TOP_TWO 5
-#define NOP 9
+#define NOP 9
-#define UNARY_POSITIVE 10
-#define UNARY_NEGATIVE 11
-#define UNARY_NOT 12
+#define UNARY_POSITIVE 10
+#define UNARY_NEGATIVE 11
+#define UNARY_NOT 12
-#define UNARY_INVERT 15
+#define UNARY_INVERT 15
-#define BINARY_POWER 19
+#define BINARY_POWER 19
-#define BINARY_MULTIPLY 20
+#define BINARY_MULTIPLY 20
-#define BINARY_MODULO 22
-#define BINARY_ADD 23
-#define BINARY_SUBTRACT 24
-#define BINARY_SUBSCR 25
+#define BINARY_MODULO 22
+#define BINARY_ADD 23
+#define BINARY_SUBTRACT 24
+#define BINARY_SUBSCR 25
#define BINARY_FLOOR_DIVIDE 26
#define BINARY_TRUE_DIVIDE 27
#define INPLACE_FLOOR_DIVIDE 28
#define INPLACE_TRUE_DIVIDE 29
-#define STORE_MAP 54
-#define INPLACE_ADD 55
-#define INPLACE_SUBTRACT 56
-#define INPLACE_MULTIPLY 57
-
-#define INPLACE_MODULO 59
-#define STORE_SUBSCR 60
-#define DELETE_SUBSCR 61
-
-#define BINARY_LSHIFT 62
-#define BINARY_RSHIFT 63
-#define BINARY_AND 64
-#define BINARY_XOR 65
-#define BINARY_OR 66
-#define INPLACE_POWER 67
-#define GET_ITER 68
-#define STORE_LOCALS 69
-#define PRINT_EXPR 70
+#define STORE_MAP 54
+#define INPLACE_ADD 55
+#define INPLACE_SUBTRACT 56
+#define INPLACE_MULTIPLY 57
+
+#define INPLACE_MODULO 59
+#define STORE_SUBSCR 60
+#define DELETE_SUBSCR 61
+
+#define BINARY_LSHIFT 62
+#define BINARY_RSHIFT 63
+#define BINARY_AND 64
+#define BINARY_XOR 65
+#define BINARY_OR 66
+#define INPLACE_POWER 67
+#define GET_ITER 68
+#define STORE_LOCALS 69
+#define PRINT_EXPR 70
#define LOAD_BUILD_CLASS 71
-
-#define INPLACE_LSHIFT 75
-#define INPLACE_RSHIFT 76
-#define INPLACE_AND 77
-#define INPLACE_XOR 78
-#define INPLACE_OR 79
-#define BREAK_LOOP 80
+#define YIELD_FROM 72
+
+#define INPLACE_LSHIFT 75
+#define INPLACE_RSHIFT 76
+#define INPLACE_AND 77
+#define INPLACE_XOR 78
+#define INPLACE_OR 79
+#define BREAK_LOOP 80
#define WITH_CLEANUP 81
-#define RETURN_VALUE 83
-#define IMPORT_STAR 84
+#define RETURN_VALUE 83
+#define IMPORT_STAR 84
-#define YIELD_VALUE 86
-#define POP_BLOCK 87
-#define END_FINALLY 88
-#define POP_EXCEPT 89
+#define YIELD_VALUE 86
+#define POP_BLOCK 87
+#define END_FINALLY 88
+#define POP_EXCEPT 89
-#define HAVE_ARGUMENT 90 /* Opcodes from here have an argument: */
+#define HAVE_ARGUMENT 90 /* Opcodes from here have an argument: */
-#define STORE_NAME 90 /* Index in name list */
-#define DELETE_NAME 91 /* "" */
-#define UNPACK_SEQUENCE 92 /* Number of sequence items */
-#define FOR_ITER 93
+#define STORE_NAME 90 /* Index in name list */
+#define DELETE_NAME 91 /* "" */
+#define UNPACK_SEQUENCE 92 /* Number of sequence items */
+#define FOR_ITER 93
#define UNPACK_EX 94 /* Num items before variable part +
(Num items after variable part << 8) */
-#define STORE_ATTR 95 /* Index in name list */
-#define DELETE_ATTR 96 /* "" */
-#define STORE_GLOBAL 97 /* "" */
-#define DELETE_GLOBAL 98 /* "" */
-
-#define LOAD_CONST 100 /* Index in const list */
-#define LOAD_NAME 101 /* Index in name list */
-#define BUILD_TUPLE 102 /* Number of tuple items */
-#define BUILD_LIST 103 /* Number of list items */
-#define BUILD_SET 104 /* Number of set items */
-#define BUILD_MAP 105 /* Always zero for now */
-#define LOAD_ATTR 106 /* Index in name list */
-#define COMPARE_OP 107 /* Comparison operator */
-#define IMPORT_NAME 108 /* Index in name list */
-#define IMPORT_FROM 109 /* Index in name list */
-
-#define JUMP_FORWARD 110 /* Number of bytes to skip */
-#define JUMP_IF_FALSE_OR_POP 111 /* Target byte offset from beginning of code */
-#define JUMP_IF_TRUE_OR_POP 112 /* "" */
-#define JUMP_ABSOLUTE 113 /* "" */
-#define POP_JUMP_IF_FALSE 114 /* "" */
-#define POP_JUMP_IF_TRUE 115 /* "" */
-
-#define LOAD_GLOBAL 116 /* Index in name list */
-
-#define CONTINUE_LOOP 119 /* Start of loop (absolute) */
-#define SETUP_LOOP 120 /* Target address (relative) */
-#define SETUP_EXCEPT 121 /* "" */
-#define SETUP_FINALLY 122 /* "" */
-
-#define LOAD_FAST 124 /* Local variable number */
-#define STORE_FAST 125 /* Local variable number */
-#define DELETE_FAST 126 /* Local variable number */
-
-#define RAISE_VARARGS 130 /* Number of raise arguments (1, 2 or 3) */
+#define STORE_ATTR 95 /* Index in name list */
+#define DELETE_ATTR 96 /* "" */
+#define STORE_GLOBAL 97 /* "" */
+#define DELETE_GLOBAL 98 /* "" */
+
+#define LOAD_CONST 100 /* Index in const list */
+#define LOAD_NAME 101 /* Index in name list */
+#define BUILD_TUPLE 102 /* Number of tuple items */
+#define BUILD_LIST 103 /* Number of list items */
+#define BUILD_SET 104 /* Number of set items */
+#define BUILD_MAP 105 /* Always zero for now */
+#define LOAD_ATTR 106 /* Index in name list */
+#define COMPARE_OP 107 /* Comparison operator */
+#define IMPORT_NAME 108 /* Index in name list */
+#define IMPORT_FROM 109 /* Index in name list */
+
+#define JUMP_FORWARD 110 /* Number of bytes to skip */
+#define JUMP_IF_FALSE_OR_POP 111 /* Target byte offset from beginning of code */
+#define JUMP_IF_TRUE_OR_POP 112 /* "" */
+#define JUMP_ABSOLUTE 113 /* "" */
+#define POP_JUMP_IF_FALSE 114 /* "" */
+#define POP_JUMP_IF_TRUE 115 /* "" */
+
+#define LOAD_GLOBAL 116 /* Index in name list */
+
+#define CONTINUE_LOOP 119 /* Start of loop (absolute) */
+#define SETUP_LOOP 120 /* Target address (relative) */
+#define SETUP_EXCEPT 121 /* "" */
+#define SETUP_FINALLY 122 /* "" */
+
+#define LOAD_FAST 124 /* Local variable number */
+#define STORE_FAST 125 /* Local variable number */
+#define DELETE_FAST 126 /* Local variable number */
+
+#define RAISE_VARARGS 130 /* Number of raise arguments (1, 2 or 3) */
/* CALL_FUNCTION_XXX opcodes defined below depend on this definition */
-#define CALL_FUNCTION 131 /* #args + (#kwargs<<8) */
-#define MAKE_FUNCTION 132 /* #defaults + #kwdefaults<<8 + #annotations<<16 */
-#define BUILD_SLICE 133 /* Number of items */
+#define CALL_FUNCTION 131 /* #args + (#kwargs<<8) */
+#define MAKE_FUNCTION 132 /* #defaults + #kwdefaults<<8 + #annotations<<16 */
+#define BUILD_SLICE 133 /* Number of items */
#define MAKE_CLOSURE 134 /* same as MAKE_FUNCTION */
#define LOAD_CLOSURE 135 /* Load free variable from closure */
@@ -127,9 +127,9 @@ extern "C" {
/* The next 3 opcodes must be contiguous and satisfy
(CALL_FUNCTION_VAR - CALL_FUNCTION) & 3 == 1 */
-#define CALL_FUNCTION_VAR 140 /* #args + (#kwargs<<8) */
-#define CALL_FUNCTION_KW 141 /* #args + (#kwargs<<8) */
-#define CALL_FUNCTION_VAR_KW 142 /* #args + (#kwargs<<8) */
+#define CALL_FUNCTION_VAR 140 /* #args + (#kwargs<<8) */
+#define CALL_FUNCTION_KW 141 /* #args + (#kwargs<<8) */
+#define CALL_FUNCTION_VAR_KW 142 /* #args + (#kwargs<<8) */
#define SETUP_WITH 143
@@ -149,7 +149,7 @@ extern "C" {
enum cmp_op {PyCmp_LT=Py_LT, PyCmp_LE=Py_LE, PyCmp_EQ=Py_EQ, PyCmp_NE=Py_NE, PyCmp_GT=Py_GT, PyCmp_GE=Py_GE,
- PyCmp_IN, PyCmp_NOT_IN, PyCmp_IS, PyCmp_IS_NOT, PyCmp_EXC_MATCH, PyCmp_BAD};
+ PyCmp_IN, PyCmp_NOT_IN, PyCmp_IS, PyCmp_IS_NOT, PyCmp_EXC_MATCH, PyCmp_BAD};
#define HAS_ARG(op) ((op) >= HAVE_ARGUMENT)
diff --git a/Include/parsetok.h b/Include/parsetok.h
index 4b7694f..911dfc1 100644
--- a/Include/parsetok.h
+++ b/Include/parsetok.h
@@ -9,7 +9,10 @@ extern "C" {
typedef struct {
int error;
- const char *filename; /* decoded from the filesystem encoding */
+#ifndef PGEN
+ /* The filename is useless for pgen, see comment in tok_state structure */
+ PyObject *filename;
+#endif
int lineno;
int offset;
char *text; /* UTF-8-encoded string */
@@ -66,8 +69,10 @@ PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(
perrdetail *err_ret,
int *flags);
-/* Note that he following function is defined in pythonrun.c not parsetok.c. */
+/* Note that the following functions are defined in pythonrun.c,
+ not in parsetok.c */
PyAPI_FUNC(void) PyParser_SetError(perrdetail *);
+PyAPI_FUNC(void) PyParser_ClearError(perrdetail *);
#ifdef __cplusplus
}
diff --git a/Include/patchlevel.h b/Include/patchlevel.h
index 0839071..b5919a4 100644
--- a/Include/patchlevel.h
+++ b/Include/patchlevel.h
@@ -17,19 +17,15 @@
/* Version parsed out into numeric values */
/*--start constants--*/
#define PY_MAJOR_VERSION 3
-#define PY_MINOR_VERSION 2
-#define PY_MICRO_VERSION 3
+#define PY_MINOR_VERSION 3
+#define PY_MICRO_VERSION 0
#define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL
#define PY_RELEASE_SERIAL 0
/* Version as a string */
-#define PY_VERSION "3.2.3+"
+#define PY_VERSION "3.3.0+"
/*--end constants--*/
-/* Subversion Revision number of this file (not of the repository). Empty
- since Mercurial migration. */
-#define PY_PATCHLEVEL_REVISION ""
-
/* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2.
Use this for numeric comparisons, e.g. #if PY_VERSION_HEX >= ... */
#define PY_VERSION_HEX ((PY_MAJOR_VERSION << 24) | \
diff --git a/Include/py_curses.h b/Include/py_curses.h
index a891c42..f2c08f6 100644
--- a/Include/py_curses.h
+++ b/Include/py_curses.h
@@ -76,6 +76,7 @@ extern "C" {
typedef struct {
PyObject_HEAD
WINDOW *win;
+ char *encoding;
} PyCursesWindowObject;
#define PyCursesWindow_Check(v) (Py_TYPE(v) == &PyCursesWindow_Type)
diff --git a/Include/pydebug.h b/Include/pydebug.h
index e23cbdc..97c2f8c 100644
--- a/Include/pydebug.h
+++ b/Include/pydebug.h
@@ -16,7 +16,6 @@ PyAPI_DATA(int) Py_BytesWarningFlag;
PyAPI_DATA(int) Py_UseClassExceptionsFlag;
PyAPI_DATA(int) Py_FrozenFlag;
PyAPI_DATA(int) Py_IgnoreEnvironmentFlag;
-PyAPI_DATA(int) Py_DivisionWarningFlag;
PyAPI_DATA(int) Py_DontWriteBytecodeFlag;
PyAPI_DATA(int) Py_NoUserSiteDirectory;
PyAPI_DATA(int) Py_UnbufferedStdioFlag;
@@ -27,8 +26,6 @@ PyAPI_DATA(int) Py_HashRandomizationFlag;
PYTHONPATH and PYTHONHOME from the environment */
#define Py_GETENV(s) (Py_IgnoreEnvironmentFlag ? NULL : getenv(s))
-PyAPI_FUNC(void) Py_FatalError(const char *message);
-
#ifdef __cplusplus
}
#endif
diff --git a/Include/pyerrors.h b/Include/pyerrors.h
index 4bb3c01..af37410 100644
--- a/Include/pyerrors.h
+++ b/Include/pyerrors.h
@@ -10,7 +10,8 @@ extern "C" {
/* PyException_HEAD defines the initial segment of every exception class. */
#define PyException_HEAD PyObject_HEAD PyObject *dict;\
PyObject *args; PyObject *traceback;\
- PyObject *context; PyObject *cause;
+ PyObject *context; PyObject *cause;\
+ char suppress_context;
typedef struct {
PyException_HEAD
@@ -28,6 +29,13 @@ typedef struct {
typedef struct {
PyException_HEAD
+ PyObject *msg;
+ PyObject *name;
+ PyObject *path;
+} PyImportErrorObject;
+
+typedef struct {
+ PyException_HEAD
PyObject *encoding;
PyObject *object;
Py_ssize_t start;
@@ -45,18 +53,23 @@ typedef struct {
PyObject *myerrno;
PyObject *strerror;
PyObject *filename;
-} PyEnvironmentErrorObject;
-
#ifdef MS_WINDOWS
-typedef struct {
- PyException_HEAD
- PyObject *myerrno;
- PyObject *strerror;
- PyObject *filename;
PyObject *winerror;
-} PyWindowsErrorObject;
#endif
+ Py_ssize_t written; /* only for BlockingIOError, -1 otherwise */
+} PyOSErrorObject;
+
+typedef struct {
+ PyException_HEAD
+ PyObject *value;
+} PyStopIterationObject;
+
+/* Compatibility typedefs */
+typedef PyOSErrorObject PyEnvironmentErrorObject;
+#ifdef MS_WINDOWS
+typedef PyOSErrorObject PyWindowsErrorObject;
#endif
+#endif /* !Py_LIMITED_API */
/* Error handling definitions */
@@ -70,7 +83,19 @@ PyAPI_FUNC(PyObject *) PyErr_Occurred(void);
PyAPI_FUNC(void) PyErr_Clear(void);
PyAPI_FUNC(void) PyErr_Fetch(PyObject **, PyObject **, PyObject **);
PyAPI_FUNC(void) PyErr_Restore(PyObject *, PyObject *, PyObject *);
-PyAPI_FUNC(void) Py_FatalError(const char *message);
+PyAPI_FUNC(void) PyErr_GetExcInfo(PyObject **, PyObject **, PyObject **);
+PyAPI_FUNC(void) PyErr_SetExcInfo(PyObject *, PyObject *, PyObject *);
+
+#if defined(__clang__) || \
+ (defined(__GNUC_MAJOR__) && \
+ ((__GNUC_MAJOR__ >= 3) || \
+ (__GNUC_MAJOR__ == 2) && (__GNUC_MINOR__ >= 5)))
+#define _Py_NO_RETURN __attribute__((__noreturn__))
+#else
+#define _Py_NO_RETURN
+#endif
+
+PyAPI_FUNC(void) Py_FatalError(const char *message) _Py_NO_RETURN;
#if defined(Py_DEBUG) || defined(Py_LIMITED_API)
#define _PyErr_OCCURRED() PyErr_Occurred()
@@ -122,10 +147,9 @@ PyAPI_DATA(PyObject *) PyExc_LookupError;
PyAPI_DATA(PyObject *) PyExc_AssertionError;
PyAPI_DATA(PyObject *) PyExc_AttributeError;
+PyAPI_DATA(PyObject *) PyExc_BufferError;
PyAPI_DATA(PyObject *) PyExc_EOFError;
PyAPI_DATA(PyObject *) PyExc_FloatingPointError;
-PyAPI_DATA(PyObject *) PyExc_EnvironmentError;
-PyAPI_DATA(PyObject *) PyExc_IOError;
PyAPI_DATA(PyObject *) PyExc_OSError;
PyAPI_DATA(PyObject *) PyExc_ImportError;
PyAPI_DATA(PyObject *) PyExc_IndexError;
@@ -150,6 +174,27 @@ PyAPI_DATA(PyObject *) PyExc_UnicodeDecodeError;
PyAPI_DATA(PyObject *) PyExc_UnicodeTranslateError;
PyAPI_DATA(PyObject *) PyExc_ValueError;
PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError;
+
+PyAPI_DATA(PyObject *) PyExc_BlockingIOError;
+PyAPI_DATA(PyObject *) PyExc_BrokenPipeError;
+PyAPI_DATA(PyObject *) PyExc_ChildProcessError;
+PyAPI_DATA(PyObject *) PyExc_ConnectionError;
+PyAPI_DATA(PyObject *) PyExc_ConnectionAbortedError;
+PyAPI_DATA(PyObject *) PyExc_ConnectionRefusedError;
+PyAPI_DATA(PyObject *) PyExc_ConnectionResetError;
+PyAPI_DATA(PyObject *) PyExc_FileExistsError;
+PyAPI_DATA(PyObject *) PyExc_FileNotFoundError;
+PyAPI_DATA(PyObject *) PyExc_InterruptedError;
+PyAPI_DATA(PyObject *) PyExc_IsADirectoryError;
+PyAPI_DATA(PyObject *) PyExc_NotADirectoryError;
+PyAPI_DATA(PyObject *) PyExc_PermissionError;
+PyAPI_DATA(PyObject *) PyExc_ProcessLookupError;
+PyAPI_DATA(PyObject *) PyExc_TimeoutError;
+
+
+/* Compatibility aliases */
+PyAPI_DATA(PyObject *) PyExc_EnvironmentError;
+PyAPI_DATA(PyObject *) PyExc_IOError;
#ifdef MS_WINDOWS
PyAPI_DATA(PyObject *) PyExc_WindowsError;
#endif
@@ -157,8 +202,6 @@ PyAPI_DATA(PyObject *) PyExc_WindowsError;
PyAPI_DATA(PyObject *) PyExc_VMSError;
#endif
-PyAPI_DATA(PyObject *) PyExc_BufferError;
-
PyAPI_DATA(PyObject *) PyExc_RecursionErrorInst;
/* Predefined warning categories */
@@ -198,8 +241,6 @@ PyAPI_FUNC(PyObject *) PyErr_Format(
);
#ifdef MS_WINDOWS
-PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilenameObject(
- int, const char *);
PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilename(
int ierr,
const char *filename /* decoded from the filesystem encoding */
@@ -224,6 +265,11 @@ PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithUnicodeFilename(
PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErr(PyObject *, int);
#endif /* MS_WINDOWS */
+PyAPI_FUNC(PyObject *) PyErr_SetExcWithArgsKwargs(PyObject *, PyObject *,
+ PyObject *);
+PyAPI_FUNC(PyObject *) PyErr_SetImportError(PyObject *, PyObject *,
+ PyObject *);
+
/* Export the old function so that the existing API remains available: */
PyAPI_FUNC(void) PyErr_BadInternalCall(void);
PyAPI_FUNC(void) _PyErr_BadInternalCall(const char *filename, int lineno);
@@ -293,6 +339,12 @@ PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create(
Py_ssize_t end,
const char *reason /* UTF-8 encoded string */
);
+PyAPI_FUNC(PyObject *) _PyUnicodeTranslateError_Create(
+ PyObject *object,
+ Py_ssize_t start,
+ Py_ssize_t end,
+ const char *reason /* UTF-8 encoded string */
+ );
#endif
/* get the encoding attribute */
@@ -348,7 +400,6 @@ PyAPI_FUNC(int) PyUnicodeTranslateError_SetReason(
const char *reason /* UTF-8 encoded string */
);
-
/* These APIs aren't really part of the error implementation, but
often needed to format error messages; the native C lib APIs are
not available on all platforms, which is why we provide emulations
diff --git a/Include/pyexpat.h b/Include/pyexpat.h
index 5340ef5..168b5b2 100644
--- a/Include/pyexpat.h
+++ b/Include/pyexpat.h
@@ -43,6 +43,8 @@ struct PyExpat_CAPI
XML_Parser parser, XML_UnknownEncodingHandler handler,
void *encodingHandlerData);
void (*SetUserData)(XML_Parser parser, void *userData);
+ void (*SetStartDoctypeDeclHandler)(XML_Parser parser,
+ XML_StartDoctypeDeclHandler start);
/* always add new stuff to the end! */
};
diff --git a/Include/pymacro.h b/Include/pymacro.h
new file mode 100644
index 0000000..52e8ee3
--- /dev/null
+++ b/Include/pymacro.h
@@ -0,0 +1,69 @@
+#ifndef Py_PYMACRO_H
+#define Py_PYMACRO_H
+
+#define Py_MIN(x, y) (((x) > (y)) ? (y) : (x))
+#define Py_MAX(x, y) (((x) > (y)) ? (x) : (y))
+
+/* Argument must be a char or an int in [-128, 127] or [0, 255]. */
+#define Py_CHARMASK(c) ((unsigned char)((c) & 0xff))
+
+
+/* Assert a build-time dependency, as an expression.
+
+ Your compile will fail if the condition isn't true, or can't be evaluated
+ by the compiler. This can be used in an expression: its value is 0.
+
+ Example:
+
+ #define foo_to_char(foo) \
+ ((char *)(foo) \
+ + Py_BUILD_ASSERT_EXPR(offsetof(struct foo, string) == 0))
+
+ Written by Rusty Russell, public domain, http://ccodearchive.net/ */
+#define Py_BUILD_ASSERT_EXPR(cond) \
+ (sizeof(char [1 - 2*!(cond)]) - 1)
+
+/* Get the number of elements in a visible array
+
+ This does not work on pointers, or arrays declared as [], or function
+ parameters. With correct compiler support, such usage will cause a build
+ error (see Py_BUILD_ASSERT_EXPR).
+
+ Written by Rusty Russell, public domain, http://ccodearchive.net/ */
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__))
+/* Two gcc extensions.
+ &a[0] degrades to a pointer: a different type from an array */
+#define Py_ARRAY_LENGTH(array) \
+ (sizeof(array) / sizeof((array)[0]) \
+ + Py_BUILD_ASSERT_EXPR(!__builtin_types_compatible_p(typeof(array), \
+ typeof(&(array)[0]))))
+#else
+#define Py_ARRAY_LENGTH(array) \
+ (sizeof(array) / sizeof((array)[0]))
+#endif
+
+
+/* Define macros for inline documentation. */
+#define PyDoc_VAR(name) static char name[]
+#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
+#ifdef WITH_DOC_STRINGS
+#define PyDoc_STR(str) str
+#else
+#define PyDoc_STR(str) ""
+#endif
+
+/* Below "a" is a power of 2. */
+/* Round down size "n" to be a multiple of "a". */
+#define _Py_SIZE_ROUND_DOWN(n, a) ((size_t)(n) & ~(size_t)((a) - 1))
+/* Round up size "n" to be a multiple of "a". */
+#define _Py_SIZE_ROUND_UP(n, a) (((size_t)(n) + \
+ (size_t)((a) - 1)) & ~(size_t)((a) - 1))
+/* Round pointer "p" down to the closest "a"-aligned address <= "p". */
+#define _Py_ALIGN_DOWN(p, a) ((void *)((Py_uintptr_t)(p) & ~(Py_uintptr_t)((a) - 1)))
+/* Round pointer "p" up to the closest "a"-aligned address >= "p". */
+#define _Py_ALIGN_UP(p, a) ((void *)(((Py_uintptr_t)(p) + \
+ (Py_uintptr_t)((a) - 1)) & ~(Py_uintptr_t)((a) - 1)))
+/* Check if pointer "p" is aligned to "a"-bytes boundary. */
+#define _Py_IS_ALIGNED(p, a) (!((Py_uintptr_t)(p) & (Py_uintptr_t)((a) - 1)))
+
+#endif /* Py_PYMACRO_H */
diff --git a/Include/pymath.h b/Include/pymath.h
index b4eda66..62a6c42 100644
--- a/Include/pymath.h
+++ b/Include/pymath.h
@@ -37,12 +37,6 @@ extern double pow(double, double);
#endif /* __STDC__ */
#endif /* _MSC_VER */
-#ifdef _OSF_SOURCE
-/* OSF1 5.1 doesn't make these available with XOPEN_SOURCE_EXTENDED defined */
-extern int finite(double);
-extern double copysign(double, double);
-#endif
-
/* High precision defintion of pi and e (Euler)
* The values are taken from libc6's math.h.
*/
diff --git a/Include/pyport.h b/Include/pyport.h
index 4ef0db2..eba34f9 100644
--- a/Include/pyport.h
+++ b/Include/pyport.h
@@ -135,7 +135,7 @@ Used in: PY_LONG_LONG
#define _PyHASH_MULTIPLIER 1000003 /* 0xf4243 */
/* Parameters used for the numeric hash implementation. See notes for
- _PyHash_Double in Objects/object.c. Numeric hashes are based on
+ _Py_HashDouble in Objects/object.c. Numeric hashes are based on
reduction modulo the prime 2**_PyHASH_BITS - 1. */
#if SIZEOF_VOID_P >= 8
diff --git a/Include/pystate.h b/Include/pystate.h
index 060efa7..2017b02 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -25,7 +25,7 @@ typedef struct _is {
PyObject *modules_by_index;
PyObject *sysdict;
PyObject *builtins;
- PyObject *modules_reloading;
+ PyObject *importlib;
PyObject *codec_search_path;
PyObject *codec_search_cache;
@@ -33,6 +33,7 @@ typedef struct _is {
int codecs_initialized;
int fscodec_initialized;
+
#ifdef HAVE_DLOPEN
int dlopenflags;
#endif
@@ -74,9 +75,9 @@ typedef struct _ts {
struct _frame *frame;
int recursion_depth;
char overflowed; /* The stack has overflowed. Allow 50 more calls
- to handle the runtime error. */
- char recursion_critical; /* The current calls must not cause
- a stack overflow. */
+ to handle the runtime error. */
+ char recursion_critical; /* The current calls must not cause
+ a stack overflow. */
/* 'tracing' keeps track of the execution depth when tracing/profiling.
This is to prevent the actual trace/profile code from being recorded in
the trace/profile. */
@@ -126,6 +127,11 @@ PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void);
PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *);
PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *);
PyAPI_FUNC(int) _PyState_AddModule(PyObject*, struct PyModuleDef*);
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
+/* New in 3.3 */
+PyAPI_FUNC(int) PyState_AddModule(PyObject*, struct PyModuleDef*);
+PyAPI_FUNC(int) PyState_RemoveModule(struct PyModuleDef*);
+#endif
PyAPI_FUNC(PyObject*) PyState_FindModule(struct PyModuleDef*);
PyAPI_FUNC(PyThreadState *) PyThreadState_New(PyInterpreterState *);
@@ -163,6 +169,8 @@ typedef
enum {PyGILState_LOCKED, PyGILState_UNLOCKED}
PyGILState_STATE;
+#ifdef WITH_THREAD
+
/* Ensure that the current thread is ready to call the Python
C API, regardless of the current state of Python, or of its
thread lock. This may be called as many times as desired
@@ -204,6 +212,8 @@ PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE);
*/
PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void);
+#endif /* #ifdef WITH_THREAD */
+
/* The implementation of sys._current_frames() Returns a dict mapping
thread id to that thread's current frame.
*/
diff --git a/Include/pythonrun.h b/Include/pythonrun.h
index e244ce7..4d24b2d 100644
--- a/Include/pythonrun.h
+++ b/Include/pythonrun.h
@@ -30,6 +30,9 @@ PyAPI_FUNC(wchar_t *) Py_GetPythonHome(void);
PyAPI_FUNC(void) Py_Initialize(void);
PyAPI_FUNC(void) Py_InitializeEx(int);
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(void) _Py_InitializeEx_Private(int, int);
+#endif
PyAPI_FUNC(void) Py_Finalize(void);
PyAPI_FUNC(int) Py_IsInitialized(void);
PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void);
@@ -82,9 +85,12 @@ PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(
PyParser_SimpleParseFileFlags(FP, S, B, 0)
#endif
PyAPI_FUNC(struct _node *) PyParser_SimpleParseStringFlags(const char *, int,
- int);
+ int);
+PyAPI_FUNC(struct _node *) PyParser_SimpleParseStringFlagsFilename(const char *,
+ const char *,
+ int, int);
PyAPI_FUNC(struct _node *) PyParser_SimpleParseFileFlags(FILE *, const char *,
- int, int);
+ int, int);
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject *) PyRun_StringFlags(const char *, int, PyObject *,
@@ -179,9 +185,6 @@ PyAPI_FUNC(const char *) Py_GetCopyright(void);
PyAPI_FUNC(const char *) Py_GetCompiler(void);
PyAPI_FUNC(const char *) Py_GetBuildInfo(void);
#ifndef Py_LIMITED_API
-PyAPI_FUNC(const char *) _Py_svnversion(void);
-PyAPI_FUNC(const char *) Py_SubversionRevision(void);
-PyAPI_FUNC(const char *) Py_SubversionShortBranch(void);
PyAPI_FUNC(const char *) _Py_hgidentifier(void);
PyAPI_FUNC(const char *) _Py_hgversion(void);
#endif
@@ -191,7 +194,7 @@ PyAPI_FUNC(const char *) _Py_hgversion(void);
PyAPI_FUNC(PyObject *) _PyBuiltin_Init(void);
PyAPI_FUNC(PyObject *) _PySys_Init(void);
PyAPI_FUNC(void) _PyImport_Init(void);
-PyAPI_FUNC(void) _PyExc_Init(void);
+PyAPI_FUNC(void) _PyExc_Init(PyObject * bltinmod);
PyAPI_FUNC(void) _PyImportHooks_Init(void);
PyAPI_FUNC(int) _PyFrame_Init(void);
PyAPI_FUNC(void) _PyFloat_Init(void);
@@ -215,6 +218,7 @@ PyAPI_FUNC(void) PyByteArray_Fini(void);
PyAPI_FUNC(void) PyFloat_Fini(void);
PyAPI_FUNC(void) PyOS_FiniInterrupts(void);
PyAPI_FUNC(void) _PyGC_Fini(void);
+PyAPI_FUNC(void) PySlice_Fini(void);
PyAPI_DATA(PyThreadState *) _Py_Finalizing;
#endif
diff --git a/Include/pythread.h b/Include/pythread.h
index 9806c61..6e9f303 100644
--- a/Include/pythread.h
+++ b/Include/pythread.h
@@ -32,7 +32,7 @@ PyAPI_FUNC(int) PyThread_acquire_lock(PyThread_type_lock, int);
on a lock (see PyThread_acquire_lock_timed() below).
PY_TIMEOUT_MAX is the highest usable value (in microseconds) of that
type, and depends on the system threading API.
-
+
NOTE: this isn't the same value as `_thread.TIMEOUT_MAX`. The _thread
module exposes a higher-level API, with timeouts expressed in seconds
and floating-point numbers allowed.
@@ -74,6 +74,8 @@ PyAPI_FUNC(void) PyThread_release_lock(PyThread_type_lock);
PyAPI_FUNC(size_t) PyThread_get_stacksize(void);
PyAPI_FUNC(int) PyThread_set_stacksize(size_t);
+PyAPI_FUNC(PyObject*) PyThread_GetInfo(void);
+
/* Thread Local Storage (TLS) API */
PyAPI_FUNC(int) PyThread_create_key(void);
PyAPI_FUNC(void) PyThread_delete_key(int);
diff --git a/Include/pytime.h b/Include/pytime.h
index d707bdb..52902f5 100644
--- a/Include/pytime.h
+++ b/Include/pytime.h
@@ -3,6 +3,7 @@
#define Py_PYTIME_H
#include "pyconfig.h" /* include for defines */
+#include "object.h"
/**************************************************************************
Symbols and macros to supply platform-independent interfaces to time related
@@ -21,11 +22,25 @@ typedef struct {
} _PyTime_timeval;
#endif
+/* Structure used by time.get_clock_info() */
+typedef struct {
+ const char *implementation;
+ int monotonic;
+ int adjustable;
+ double resolution;
+} _Py_clock_info_t;
+
/* Similar to POSIX gettimeofday but cannot fail. If system gettimeofday
* fails or is not available, fall back to lower resolution clocks.
*/
PyAPI_FUNC(void) _PyTime_gettimeofday(_PyTime_timeval *tp);
+/* Similar to _PyTime_gettimeofday() but retrieve also information on the
+ * clock used to get the current time. */
+PyAPI_FUNC(void) _PyTime_gettimeofday_info(
+ _PyTime_timeval *tp,
+ _Py_clock_info_t *info);
+
#define _PyTime_ADD_SECONDS(tv, interval) \
do { \
tv.tv_usec += (long) (((long) interval - interval) * 1000000); \
@@ -37,6 +52,37 @@ do { \
((tv_end.tv_sec - tv_start.tv_sec) + \
(tv_end.tv_usec - tv_start.tv_usec) * 0.000001)
+#ifndef Py_LIMITED_API
+/* Convert a number of seconds, int or float, to time_t. */
+PyAPI_FUNC(int) _PyTime_ObjectToTime_t(
+ PyObject *obj,
+ time_t *sec);
+
+/* Convert a time_t to a PyLong. */
+PyAPI_FUNC(PyObject *) _PyLong_FromTime_t(
+ time_t sec);
+
+/* Convert a PyLong to a time_t. */
+PyAPI_FUNC(time_t) _PyLong_AsTime_t(
+ PyObject *obj);
+
+/* Convert a number of seconds, int or float, to a timeval structure.
+ usec is in the range [0; 999999] and rounded towards zero.
+ For example, -1.2 is converted to (-2, 800000). */
+PyAPI_FUNC(int) _PyTime_ObjectToTimeval(
+ PyObject *obj,
+ time_t *sec,
+ long *usec);
+
+/* Convert a number of seconds, int or float, to a timespec structure.
+ nsec is in the range [0; 999999999] and rounded towards zero.
+ For example, -1.2 is converted to (-2, 800000000). */
+PyAPI_FUNC(int) _PyTime_ObjectToTimespec(
+ PyObject *obj,
+ time_t *sec,
+ long *nsec);
+#endif
+
/* Dummy to force linking. */
PyAPI_FUNC(void) _PyTime_Init(void);
diff --git a/Include/setobject.h b/Include/setobject.h
index 6234111..a14874b 100644
--- a/Include/setobject.h
+++ b/Include/setobject.h
@@ -99,6 +99,9 @@ PyAPI_FUNC(int) _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key,
PyAPI_FUNC(PyObject *) PySet_Pop(PyObject *set);
#ifndef Py_LIMITED_API
PyAPI_FUNC(int) _PySet_Update(PyObject *set, PyObject *iterable);
+
+PyAPI_FUNC(int) PySet_ClearFreeList(void);
+PyAPI_FUNC(void) _PySet_DebugMallocStats(FILE *out);
#endif
#ifdef __cplusplus
diff --git a/Include/structmember.h b/Include/structmember.h
index 0b85b2a..948f690 100644
--- a/Include/structmember.h
+++ b/Include/structmember.h
@@ -9,16 +9,6 @@ extern "C" {
#include <stddef.h> /* For offsetof */
-/* The offsetof() macro calculates the offset of a structure member
- in its structure. Unfortunately this cannot be written down
- portably, hence it is provided by a Standard C header file.
- For pre-Standard C compilers, here is a version that usually works
- (but watch out!): */
-
-#ifndef offsetof
-#define offsetof(type, member) ( (int) & ((type*)0) -> member )
-#endif
-
/* An array of PyMemberDef structures defines the name, type and offset
of selected members of a C structure. These can be read by
PyMember_GetOne() and set by PyMember_SetOne() (except if their READONLY
@@ -26,42 +16,41 @@ extern "C" {
pointer is NULL. */
typedef struct PyMemberDef {
- /* Current version, use this */
- char *name;
- int type;
- Py_ssize_t offset;
- int flags;
- char *doc;
+ char *name;
+ int type;
+ Py_ssize_t offset;
+ int flags;
+ char *doc;
} PyMemberDef;
/* Types */
-#define T_SHORT 0
-#define T_INT 1
-#define T_LONG 2
-#define T_FLOAT 3
-#define T_DOUBLE 4
-#define T_STRING 5
-#define T_OBJECT 6
+#define T_SHORT 0
+#define T_INT 1
+#define T_LONG 2
+#define T_FLOAT 3
+#define T_DOUBLE 4
+#define T_STRING 5
+#define T_OBJECT 6
/* XXX the ordering here is weird for binary compatibility */
-#define T_CHAR 7 /* 1-character string */
-#define T_BYTE 8 /* 8-bit signed int */
+#define T_CHAR 7 /* 1-character string */
+#define T_BYTE 8 /* 8-bit signed int */
/* unsigned variants: */
-#define T_UBYTE 9
-#define T_USHORT 10
-#define T_UINT 11
-#define T_ULONG 12
+#define T_UBYTE 9
+#define T_USHORT 10
+#define T_UINT 11
+#define T_ULONG 12
/* Added by Jack: strings contained in the structure */
-#define T_STRING_INPLACE 13
+#define T_STRING_INPLACE 13
/* Added by Lillo: bools contained in the structure (assumed char) */
-#define T_BOOL 14
+#define T_BOOL 14
-#define T_OBJECT_EX 16 /* Like T_OBJECT, but raises AttributeError
- when the value is NULL, instead of
- converting to None. */
+#define T_OBJECT_EX 16 /* Like T_OBJECT, but raises AttributeError
+ when the value is NULL, instead of
+ converting to None. */
#ifdef HAVE_LONG_LONG
-#define T_LONGLONG 17
+#define T_LONGLONG 17
#define T_ULONGLONG 18
#endif /* HAVE_LONG_LONG */
@@ -70,10 +59,10 @@ typedef struct PyMemberDef {
/* Flags */
-#define READONLY 1
-#define READ_RESTRICTED 2
+#define READONLY 1
+#define READ_RESTRICTED 2
#define PY_WRITE_RESTRICTED 4
-#define RESTRICTED (READ_RESTRICTED | PY_WRITE_RESTRICTED)
+#define RESTRICTED (READ_RESTRICTED | PY_WRITE_RESTRICTED)
/* Current API, use this */
diff --git a/Include/symtable.h b/Include/symtable.h
index fd7de04..82f6269 100644
--- a/Include/symtable.h
+++ b/Include/symtable.h
@@ -23,10 +23,13 @@ struct symtable {
PyObject *st_blocks; /* dict: map AST node addresses
* to symbol table entries */
PyObject *st_stack; /* list: stack of namespace info */
- PyObject *st_global; /* borrowed ref to st_top->st_symbols */
- int st_nblocks; /* number of blocks used */
+ PyObject *st_global; /* borrowed ref to st_top->ste_symbols */
+ int st_nblocks; /* number of blocks used. kept for
+ consistency with the corresponding
+ compiler structure */
PyObject *st_private; /* name of current class or NULL */
- PyFutureFeatures *st_future; /* module's future features */
+ PyFutureFeatures *st_future; /* module's future features that affect
+ the symbol table */
};
typedef struct _symtable_entry {
@@ -34,7 +37,7 @@ typedef struct _symtable_entry {
PyObject *ste_id; /* int: key in ste_table->st_blocks */
PyObject *ste_symbols; /* dict: variable names to flags */
PyObject *ste_name; /* string: name of current block */
- PyObject *ste_varnames; /* list of variable names */
+ PyObject *ste_varnames; /* list of function parameters */
PyObject *ste_children; /* list of child blocks */
_Py_block_ty ste_type; /* module, class, or function */
int ste_unoptimized; /* false if namespace is optimized */
diff --git a/Include/timefuncs.h b/Include/timefuncs.h
deleted file mode 100644
index 3c43575..0000000
--- a/Include/timefuncs.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* timefuncs.h
- */
-
-/* Utility function related to timemodule.c. */
-
-#ifndef TIMEFUNCS_H
-#define TIMEFUNCS_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* Cast double x to time_t, but raise ValueError if x is too large
- * to fit in a time_t. ValueError is set on return iff the return
- * value is (time_t)-1 and PyErr_Occurred().
- */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(time_t) _PyTime_DoubleToTimet(double x);
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-#endif /* TIMEFUNCS_H */
diff --git a/Include/traceback.h b/Include/traceback.h
index 69e3d05..7734707 100644
--- a/Include/traceback.h
+++ b/Include/traceback.h
@@ -5,6 +5,8 @@
extern "C" {
#endif
+#include "pystate.h"
+
struct _frame;
/* Traceback interface */
@@ -28,6 +30,42 @@ PyAPI_FUNC(int) _Py_DisplaySourceLine(PyObject *, PyObject *, int, int);
PyAPI_DATA(PyTypeObject) PyTraceBack_Type;
#define PyTraceBack_Check(v) (Py_TYPE(v) == &PyTraceBack_Type)
+/* Write the Python traceback into the file 'fd'. For example:
+
+ Traceback (most recent call first):
+ File "xxx", line xxx in <xxx>
+ File "xxx", line xxx in <xxx>
+ ...
+ File "xxx", line xxx in <xxx>
+
+ This function is written for debug purpose only, to dump the traceback in
+ the worst case: after a segmentation fault, at fatal error, etc. That's why,
+ it is very limited. Strings are truncated to 100 characters and encoded to
+ ASCII with backslashreplace. It doesn't write the source code, only the
+ function name, filename and line number of each frame. Write only the first
+ 100 frames: if the traceback is truncated, write the line " ...".
+
+ This function is signal safe. */
+
+PyAPI_DATA(void) _Py_DumpTraceback(
+ int fd,
+ PyThreadState *tstate);
+
+/* Write the traceback of all threads into the file 'fd'. current_thread can be
+ NULL. Return NULL on success, or an error message on error.
+
+ This function is written for debug purpose only. It calls
+ _Py_DumpTraceback() for each thread, and so has the same limitations. It
+ only write the traceback of the first 100 threads: write "..." if there are
+ more threads.
+
+ This function is signal safe. */
+
+PyAPI_DATA(const char*) _Py_DumpTracebackThreads(
+ int fd, PyInterpreterState *interp,
+ PyThreadState *current_thread);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/Include/tupleobject.h b/Include/tupleobject.h
index f17b788..c273ce7 100644
--- a/Include/tupleobject.h
+++ b/Include/tupleobject.h
@@ -63,6 +63,9 @@ PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *);
#endif
PyAPI_FUNC(int) PyTuple_ClearFreeList(void);
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(void) _PyTuple_DebugMallocStats(FILE *out);
+#endif /* Py_LIMITED_API */
#ifdef __cplusplus
}
diff --git a/Include/ucnhash.h b/Include/ucnhash.h
index 70fdf13..8de9ba0 100644
--- a/Include/ucnhash.h
+++ b/Include/ucnhash.h
@@ -19,11 +19,13 @@ typedef struct {
success, zero if not. Does not set Python exceptions.
If self is NULL, data come from the default version of the database.
If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */
- int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen);
+ int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen,
+ int with_alias_and_seq);
/* Get character code for a given name. Same error handling
as for getname. */
- int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code);
+ int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code,
+ int with_named_seq);
} _PyUnicode_Name_CAPI;
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 379a90c..a8f5b5d 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -64,16 +64,15 @@ Copyright (c) Corporation for National Research Initiatives.
/* Python 3.x requires unicode */
#define Py_USING_UNICODE
-/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
- properly set, but the default rules below doesn't set it. I'll
- sort this out some other day -- fredrik@pythonware.com */
-
-#ifndef Py_UNICODE_SIZE
-#error Must define Py_UNICODE_SIZE
+#ifndef SIZEOF_WCHAR_T
+#error Must define SIZEOF_WCHAR_T
#endif
-/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
- strings are stored as UCS-2 (with limited support for UTF-16) */
+#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
+
+/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
+ Otherwise, Unicode strings are stored as UCS-2 (with limited support
+ for UTF-16) */
#if Py_UNICODE_SIZE >= 4
#define Py_UNICODE_WIDE
@@ -84,19 +83,14 @@ Copyright (c) Corporation for National Research Initiatives.
/* #define HAVE_WCHAR_H */
/* #define HAVE_USABLE_WCHAR_T */
-/* Defaults for various platforms */
-#ifndef PY_UNICODE_TYPE
-
-/* Windows has a usable wchar_t type (unless we're using UCS-4) */
-# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
-# define HAVE_USABLE_WCHAR_T
-# define PY_UNICODE_TYPE wchar_t
-# endif
-
-# if defined(Py_UNICODE_WIDE)
-# define PY_UNICODE_TYPE Py_UCS4
-# endif
+/* Py_UNICODE was the native Unicode storage format (code unit) used by
+ Python and represents a single Unicode element in the Unicode type.
+ With PEP 393, Py_UNICODE is deprecated and replaced with a
+ typedef to wchar_t. */
+#ifndef Py_LIMITED_API
+#define PY_UNICODE_TYPE wchar_t
+typedef wchar_t Py_UNICODE;
#endif
/* If the compiler provides a wchar_t type we try to support it
@@ -109,6 +103,10 @@ Copyright (c) Corporation for National Research Initiatives.
# endif
#endif
+#if defined(MS_WINDOWS)
+# define HAVE_MBCS
+#endif
+
#ifdef HAVE_WCHAR_H
/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
# ifdef _HAVE_BSDI
@@ -117,201 +115,24 @@ Copyright (c) Corporation for National Research Initiatives.
# include <wchar.h>
#endif
-/*
- * Use this typedef when you need to represent a UTF-16 surrogate pair
- * as single unsigned integer.
- */
-#if SIZEOF_INT >= 4
+/* Py_UCS4 and Py_UCS2 are typedefs for the respective
+ unicode representations. */
+#if SIZEOF_INT == 4
typedef unsigned int Py_UCS4;
-#elif SIZEOF_LONG >= 4
+#elif SIZEOF_LONG == 4
typedef unsigned long Py_UCS4;
+#else
+#error "Could not find a proper typedef for Py_UCS4"
#endif
-/* Py_UNICODE is the native Unicode storage format (code unit) used by
- Python and represents a single Unicode element in the Unicode
- type. */
-
-#ifndef Py_LIMITED_API
-typedef PY_UNICODE_TYPE Py_UNICODE;
-#endif
-
-/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
-
-/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
- produce different external names and thus cause import errors in
- case Python interpreters and extensions with mixed compiled in
- Unicode width assumptions are combined. */
-
-#ifndef Py_UNICODE_WIDE
-
-# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
-# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
-# define PyUnicode_AsDecodedObject PyUnicodeUCS2_AsDecodedObject
-# define PyUnicode_AsDecodedUnicode PyUnicodeUCS2_AsDecodedUnicode
-# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
-# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
-# define PyUnicode_AsEncodedUnicode PyUnicodeUCS2_AsEncodedUnicode
-# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
-# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
-# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
-# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
-# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
-# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
-# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
-# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
-# define PyUnicode_AsWideCharString PyUnicodeUCS2_AsWideCharString
-# define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
-# define PyUnicode_Compare PyUnicodeUCS2_Compare
-# define PyUnicode_CompareWithASCIIString PyUnicodeUCS2_CompareWithASCIIString
-# define PyUnicode_Concat PyUnicodeUCS2_Concat
-# define PyUnicode_Append PyUnicodeUCS2_Append
-# define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel
-# define PyUnicode_Contains PyUnicodeUCS2_Contains
-# define PyUnicode_Count PyUnicodeUCS2_Count
-# define PyUnicode_Decode PyUnicodeUCS2_Decode
-# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
-# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
-# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
-# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
-# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS2_DecodeFSDefaultAndSize
-# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
-# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
-# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
-# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
-# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
-# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
-# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
-# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
-# define PyUnicode_Encode PyUnicodeUCS2_Encode
-# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
-# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
-# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
-# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
-# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
-# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
-# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
-# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
-# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
-# define PyUnicode_Find PyUnicodeUCS2_Find
-# define PyUnicode_Format PyUnicodeUCS2_Format
-# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
-# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
-# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
-# define PyUnicode_FromObject PyUnicodeUCS2_FromObject
-# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
-# define PyUnicode_FromString PyUnicodeUCS2_FromString
-# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
-# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
-# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
-# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
-# define PyUnicode_FSDecoder PyUnicodeUCS2_FSDecoder
-# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
-# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
-# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
-# define PyUnicode_IsIdentifier PyUnicodeUCS2_IsIdentifier
-# define PyUnicode_Join PyUnicodeUCS2_Join
-# define PyUnicode_Partition PyUnicodeUCS2_Partition
-# define PyUnicode_RPartition PyUnicodeUCS2_RPartition
-# define PyUnicode_RSplit PyUnicodeUCS2_RSplit
-# define PyUnicode_Replace PyUnicodeUCS2_Replace
-# define PyUnicode_Resize PyUnicodeUCS2_Resize
-# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
-# define PyUnicode_Split PyUnicodeUCS2_Split
-# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
-# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
-# define PyUnicode_Translate PyUnicodeUCS2_Translate
-# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
-# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
-# define _PyUnicode_Fini _PyUnicodeUCS2_Fini
-# define _PyUnicode_Init _PyUnicodeUCS2_Init
-# define PyUnicode_strdup PyUnicodeUCS2_strdup
-
+#if SIZEOF_SHORT == 2
+typedef unsigned short Py_UCS2;
#else
-
-# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
-# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
-# define PyUnicode_AsDecodedObject PyUnicodeUCS4_AsDecodedObject
-# define PyUnicode_AsDecodedUnicode PyUnicodeUCS4_AsDecodedUnicode
-# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
-# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
-# define PyUnicode_AsEncodedUnicode PyUnicodeUCS4_AsEncodedUnicode
-# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
-# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
-# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
-# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
-# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
-# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
-# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
-# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
-# define PyUnicode_AsWideCharString PyUnicodeUCS4_AsWideCharString
-# define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
-# define PyUnicode_Compare PyUnicodeUCS4_Compare
-# define PyUnicode_CompareWithASCIIString PyUnicodeUCS4_CompareWithASCIIString
-# define PyUnicode_Concat PyUnicodeUCS4_Concat
-# define PyUnicode_Append PyUnicodeUCS4_Append
-# define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel
-# define PyUnicode_Contains PyUnicodeUCS4_Contains
-# define PyUnicode_Count PyUnicodeUCS4_Count
-# define PyUnicode_Decode PyUnicodeUCS4_Decode
-# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
-# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
-# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
-# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
-# define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS4_DecodeFSDefaultAndSize
-# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
-# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
-# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
-# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
-# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
-# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
-# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
-# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
-# define PyUnicode_Encode PyUnicodeUCS4_Encode
-# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
-# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
-# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
-# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
-# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
-# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
-# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
-# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
-# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
-# define PyUnicode_Find PyUnicodeUCS4_Find
-# define PyUnicode_Format PyUnicodeUCS4_Format
-# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
-# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
-# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
-# define PyUnicode_FromObject PyUnicodeUCS4_FromObject
-# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
-# define PyUnicode_FromString PyUnicodeUCS4_FromString
-# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
-# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
-# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
-# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
-# define PyUnicode_FSDecoder PyUnicodeUCS4_FSDecoder
-# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
-# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
-# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
-# define PyUnicode_IsIdentifier PyUnicodeUCS4_IsIdentifier
-# define PyUnicode_Join PyUnicodeUCS4_Join
-# define PyUnicode_Partition PyUnicodeUCS4_Partition
-# define PyUnicode_RPartition PyUnicodeUCS4_RPartition
-# define PyUnicode_RSplit PyUnicodeUCS4_RSplit
-# define PyUnicode_Replace PyUnicodeUCS4_Replace
-# define PyUnicode_Resize PyUnicodeUCS4_Resize
-# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
-# define PyUnicode_Split PyUnicodeUCS4_Split
-# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
-# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
-# define PyUnicode_Translate PyUnicodeUCS4_Translate
-# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
-# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
-# define _PyUnicode_Fini _PyUnicodeUCS4_Fini
-# define _PyUnicode_Init _PyUnicodeUCS4_Init
-# define PyUnicode_strdup PyUnicodeUCS4_strdup
-
+#error "Could not find a proper typedef for Py_UCS2"
#endif
+typedef unsigned char Py_UCS1;
+
/* --- Internal Unicode Operations ---------------------------------------- */
/* Since splitting on whitespace is an important use case, and
@@ -350,7 +171,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
Py_UNICODE_ISDIGIT(ch) || \
Py_UNICODE_ISNUMERIC(ch))
-#define Py_UNICODE_COPY(target, source, length) \
+#define Py_UNICODE_COPY(target, source, length) \
Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
#define Py_UNICODE_FILL(target, value, length) \
@@ -358,13 +179,27 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
} while (0)
+/* macros to work with surrogates */
+#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF)
+#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF)
+#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF)
+/* Join two surrogate characters and return a single Py_UCS4 value. */
+#define Py_UNICODE_JOIN_SURROGATES(high, low) \
+ (((((Py_UCS4)(high) & 0x03FF) << 10) | \
+ ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
+/* high surrogate = top 10 bits added to D800 */
+#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
+/* low surrogate = bottom 10 bits added to DC00 */
+#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
+
/* Check if substring matches at given offset. The offset must be
valid, and the substring must not be empty. */
#define Py_UNICODE_MATCH(string, offset, substring) \
- ((*((string)->str + (offset)) == *((substring)->str)) && \
- ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
- !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
+ ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
+ ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
+ !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
+
#endif /* Py_LIMITED_API */
#ifdef __cplusplus
@@ -374,41 +209,374 @@ extern "C" {
/* --- Unicode Type ------------------------------------------------------- */
#ifndef Py_LIMITED_API
+
+/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
+ structure. state.ascii and state.compact are set, and the data
+ immediately follow the structure. utf8_length and wstr_length can be found
+ in the length field; the utf8 pointer is equal to the data pointer. */
typedef struct {
+ /* There are 4 forms of Unicode strings:
+
+ - compact ascii:
+
+ * structure = PyASCIIObject
+ * test: PyUnicode_IS_COMPACT_ASCII(op)
+ * kind = PyUnicode_1BYTE_KIND
+ * compact = 1
+ * ascii = 1
+ * ready = 1
+ * (length is the length of the utf8 and wstr strings)
+ * (data starts just after the structure)
+ * (since ASCII is decoded from UTF-8, the utf8 string are the data)
+
+ - compact:
+
+ * structure = PyCompactUnicodeObject
+ * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
+ * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
+ PyUnicode_4BYTE_KIND
+ * compact = 1
+ * ready = 1
+ * ascii = 0
+ * utf8 is not shared with data
+ * utf8_length = 0 if utf8 is NULL
+ * wstr is shared with data and wstr_length=length
+ if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+ or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
+ * wstr_length = 0 if wstr is NULL
+ * (data starts just after the structure)
+
+ - legacy string, not ready:
+
+ * structure = PyUnicodeObject
+ * test: kind == PyUnicode_WCHAR_KIND
+ * length = 0 (use wstr_length)
+ * hash = -1
+ * kind = PyUnicode_WCHAR_KIND
+ * compact = 0
+ * ascii = 0
+ * ready = 0
+ * interned = SSTATE_NOT_INTERNED
+ * wstr is not NULL
+ * data.any is NULL
+ * utf8 is NULL
+ * utf8_length = 0
+
+ - legacy string, ready:
+
+ * structure = PyUnicodeObject structure
+ * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
+ * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
+ PyUnicode_4BYTE_KIND
+ * compact = 0
+ * ready = 1
+ * data.any is not NULL
+ * utf8 is shared and utf8_length = length with data.any if ascii = 1
+ * utf8_length = 0 if utf8 is NULL
+ * wstr is shared with data.any and wstr_length = length
+ if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+ or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
+ * wstr_length = 0 if wstr is NULL
+
+ Compact strings use only one memory block (structure + characters),
+ whereas legacy strings use one block for the structure and one block
+ for characters.
+
+ Legacy strings are created by PyUnicode_FromUnicode() and
+ PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
+ when PyUnicode_READY() is called.
+
+ See also _PyUnicode_CheckConsistency().
+ */
PyObject_HEAD
- Py_ssize_t length; /* Length of raw Unicode data in buffer */
- Py_UNICODE *str; /* Raw Unicode buffer */
+ Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */
- int state; /* != 0 if interned. In this case the two
- * references from the dictionary to this object
- * are *not* counted in ob_refcnt. */
- PyObject *defenc; /* (Default) Encoded version as Python
- string, or NULL; this is used for
- implementing the buffer protocol */
+ struct {
+ /*
+ SSTATE_NOT_INTERNED (0)
+ SSTATE_INTERNED_MORTAL (1)
+ SSTATE_INTERNED_IMMORTAL (2)
+
+ If interned != SSTATE_NOT_INTERNED, the two references from the
+ dictionary to this object are *not* counted in ob_refcnt.
+ */
+ unsigned int interned:2;
+ /* Character size:
+
+ - PyUnicode_WCHAR_KIND (0):
+
+ * character type = wchar_t (16 or 32 bits, depending on the
+ platform)
+
+ - PyUnicode_1BYTE_KIND (1):
+
+ * character type = Py_UCS1 (8 bits, unsigned)
+ * all characters are in the range U+0000-U+00FF (latin1)
+ * if ascii is set, all characters are in the range U+0000-U+007F
+ (ASCII), otherwise at least one character is in the range
+ U+0080-U+00FF
+
+ - PyUnicode_2BYTE_KIND (2):
+
+ * character type = Py_UCS2 (16 bits, unsigned)
+ * all characters are in the range U+0000-U+FFFF (BMP)
+ * at least one character is in the range U+0100-U+FFFF
+
+ - PyUnicode_4BYTE_KIND (4):
+
+ * character type = Py_UCS4 (32 bits, unsigned)
+ * all characters are in the range U+0000-U+10FFFF
+ * at least one character is in the range U+10000-U+10FFFF
+ */
+ unsigned int kind:3;
+ /* Compact is with respect to the allocation scheme. Compact unicode
+ objects only require one memory block while non-compact objects use
+ one block for the PyUnicodeObject struct and another for its data
+ buffer. */
+ unsigned int compact:1;
+ /* The string only contains characters in the range U+0000-U+007F (ASCII)
+ and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
+ set, use the PyASCIIObject structure. */
+ unsigned int ascii:1;
+ /* The ready flag indicates whether the object layout is initialized
+ completely. This means that this is either a compact object, or
+ the data pointer is filled out. The bit is redundant, and helps
+ to minimize the test in PyUnicode_IS_READY(). */
+ unsigned int ready:1;
+ } state;
+ wchar_t *wstr; /* wchar_t representation (null-terminated) */
+} PyASCIIObject;
+
+/* Non-ASCII strings allocated through PyUnicode_New use the
+ PyCompactUnicodeObject structure. state.compact is set, and the data
+ immediately follow the structure. */
+typedef struct {
+ PyASCIIObject _base;
+ Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
+ * terminating \0. */
+ char *utf8; /* UTF-8 representation (null-terminated) */
+ Py_ssize_t wstr_length; /* Number of code points in wstr, possible
+ * surrogates count as two code points. */
+} PyCompactUnicodeObject;
+
+/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
+ PyUnicodeObject structure. The actual string data is initially in the wstr
+ block, and copied into the data block using _PyUnicode_Ready. */
+typedef struct {
+ PyCompactUnicodeObject _base;
+ union {
+ void *any;
+ Py_UCS1 *latin1;
+ Py_UCS2 *ucs2;
+ Py_UCS4 *ucs4;
+ } data; /* Canonical, smallest-form Unicode buffer */
} PyUnicodeObject;
#endif
PyAPI_DATA(PyTypeObject) PyUnicode_Type;
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
-#define SSTATE_NOT_INTERNED 0
-#define SSTATE_INTERNED_MORTAL 1
-#define SSTATE_INTERNED_IMMORTAL 2
-
#define PyUnicode_Check(op) \
PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
/* Fast access macros */
#ifndef Py_LIMITED_API
-#define PyUnicode_GET_SIZE(op) \
- (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
+
+#define PyUnicode_WSTR_LENGTH(op) \
+ (PyUnicode_IS_COMPACT_ASCII(op) ? \
+ ((PyASCIIObject*)op)->length : \
+ ((PyCompactUnicodeObject*)op)->wstr_length)
+
+/* Returns the deprecated Py_UNICODE representation's size in code units
+ (this includes surrogate pairs as 2 units).
+ If the Py_UNICODE representation is not available, it will be computed
+ on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
+
+#define PyUnicode_GET_SIZE(op) \
+ (assert(PyUnicode_Check(op)), \
+ (((PyASCIIObject *)(op))->wstr) ? \
+ PyUnicode_WSTR_LENGTH(op) : \
+ ((void)PyUnicode_AsUnicode((PyObject *)(op)), \
+ assert(((PyASCIIObject *)(op))->wstr), \
+ PyUnicode_WSTR_LENGTH(op)))
+
#define PyUnicode_GET_DATA_SIZE(op) \
- (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
+ (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
+
+/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
+ representation on demand. Using this macro is very inefficient now,
+ try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
+ use PyUnicode_WRITE() and PyUnicode_READ(). */
+
#define PyUnicode_AS_UNICODE(op) \
- (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
+ (assert(PyUnicode_Check(op)), \
+ (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
+ PyUnicode_AsUnicode((PyObject *)(op)))
+
#define PyUnicode_AS_DATA(op) \
- (assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str))
+ ((const char *)(PyUnicode_AS_UNICODE(op)))
+
+
+/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
+
+/* Values for PyASCIIObject.state: */
+
+/* Interning state. */
+#define SSTATE_NOT_INTERNED 0
+#define SSTATE_INTERNED_MORTAL 1
+#define SSTATE_INTERNED_IMMORTAL 2
+
+/* Return true if the string contains only ASCII characters, or 0 if not. The
+ string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
+ ready. */
+#define PyUnicode_IS_ASCII(op) \
+ (assert(PyUnicode_Check(op)), \
+ assert(PyUnicode_IS_READY(op)), \
+ ((PyASCIIObject*)op)->state.ascii)
+
+/* Return true if the string is compact or 0 if not.
+ No type checks or Ready calls are performed. */
+#define PyUnicode_IS_COMPACT(op) \
+ (((PyASCIIObject*)(op))->state.compact)
+
+/* Return true if the string is a compact ASCII string (use PyASCIIObject
+ structure), or 0 if not. No type checks or Ready calls are performed. */
+#define PyUnicode_IS_COMPACT_ASCII(op) \
+ (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
+
+enum PyUnicode_Kind {
+/* String contains only wstr byte characters. This is only possible
+ when the string was created with a legacy API and _PyUnicode_Ready()
+ has not been called yet. */
+ PyUnicode_WCHAR_KIND = 0,
+/* Return values of the PyUnicode_KIND() macro: */
+ PyUnicode_1BYTE_KIND = 1,
+ PyUnicode_2BYTE_KIND = 2,
+ PyUnicode_4BYTE_KIND = 4
+};
+
+/* Return pointers to the canonical representation cast to unsigned char,
+ Py_UCS2, or Py_UCS4 for direct character access.
+ No checks are performed, use PyUnicode_KIND() before to ensure
+ these will work correctly. */
+
+#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
+#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
+#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
+
+/* Return one of the PyUnicode_*_KIND values defined above. */
+#define PyUnicode_KIND(op) \
+ (assert(PyUnicode_Check(op)), \
+ assert(PyUnicode_IS_READY(op)), \
+ ((PyASCIIObject *)(op))->state.kind)
+
+/* Return a void pointer to the raw unicode buffer. */
+#define _PyUnicode_COMPACT_DATA(op) \
+ (PyUnicode_IS_ASCII(op) ? \
+ ((void*)((PyASCIIObject*)(op) + 1)) : \
+ ((void*)((PyCompactUnicodeObject*)(op) + 1)))
+
+#define _PyUnicode_NONCOMPACT_DATA(op) \
+ (assert(((PyUnicodeObject*)(op))->data.any), \
+ ((((PyUnicodeObject *)(op))->data.any)))
+
+#define PyUnicode_DATA(op) \
+ (assert(PyUnicode_Check(op)), \
+ PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
+ _PyUnicode_NONCOMPACT_DATA(op))
+
+/* In the access macros below, "kind" may be evaluated more than once.
+ All other macro parameters are evaluated exactly once, so it is safe
+ to put side effects into them (such as increasing the index). */
+
+/* Write into the canonical representation, this macro does not do any sanity
+ checks and is intended for usage in loops. The caller should cache the
+ kind and data pointers obtained from other macro calls.
+ index is the index in the string (starts at 0) and value is the new
+ code point value which should be written to that location. */
+#define PyUnicode_WRITE(kind, data, index, value) \
+ do { \
+ switch ((kind)) { \
+ case PyUnicode_1BYTE_KIND: { \
+ ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
+ break; \
+ } \
+ case PyUnicode_2BYTE_KIND: { \
+ ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
+ break; \
+ } \
+ default: { \
+ assert((kind) == PyUnicode_4BYTE_KIND); \
+ ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
+ } \
+ } \
+ } while (0)
+
+/* Read a code point from the string's canonical representation. No checks
+ or ready calls are performed. */
+#define PyUnicode_READ(kind, data, index) \
+ ((Py_UCS4) \
+ ((kind) == PyUnicode_1BYTE_KIND ? \
+ ((const Py_UCS1 *)(data))[(index)] : \
+ ((kind) == PyUnicode_2BYTE_KIND ? \
+ ((const Py_UCS2 *)(data))[(index)] : \
+ ((const Py_UCS4 *)(data))[(index)] \
+ ) \
+ ))
+
+/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
+ calls PyUnicode_KIND() and might call it twice. For single reads, use
+ PyUnicode_READ_CHAR, for multiple consecutive reads callers should
+ cache kind and use PyUnicode_READ instead. */
+#define PyUnicode_READ_CHAR(unicode, index) \
+ (assert(PyUnicode_Check(unicode)), \
+ assert(PyUnicode_IS_READY(unicode)), \
+ (Py_UCS4) \
+ (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
+ ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
+ (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
+ ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
+ ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
+ ) \
+ ))
+
+/* Returns the length of the unicode string. The caller has to make sure that
+ the string has it's canonical representation set before calling
+ this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
+#define PyUnicode_GET_LENGTH(op) \
+ (assert(PyUnicode_Check(op)), \
+ assert(PyUnicode_IS_READY(op)), \
+ ((PyASCIIObject *)(op))->length)
+
+
+/* Fast check to determine whether an object is ready. Equivalent to
+ PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
+
+#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
+
+/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
+ case. If the canonical representation is not yet set, it will still call
+ _PyUnicode_Ready().
+ Returns 0 on success and -1 on errors. */
+#define PyUnicode_READY(op) \
+ (assert(PyUnicode_Check(op)), \
+ (PyUnicode_IS_READY(op) ? \
+ 0 : _PyUnicode_Ready((PyObject *)(op))))
+
+/* Return a maximum character value which is suitable for creating another
+ string based on op. This is always an approximation but more efficient
+ than iterating over the string. */
+#define PyUnicode_MAX_CHAR_VALUE(op) \
+ (assert(PyUnicode_IS_READY(op)), \
+ (PyUnicode_IS_ASCII(op) ? \
+ (0x7f) : \
+ (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
+ (0xffU) : \
+ (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
+ (0xffffU) : \
+ (0x10ffffU)))))
+
#endif
/* --- Constants ---------------------------------------------------------- */
@@ -418,12 +586,107 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
Unicode 3.0. */
-#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
+#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
/* === Public API ========================================================= */
/* --- Plain Py_UNICODE --------------------------------------------------- */
+/* With PEP 393, this is the recommended way to allocate a new unicode object.
+ This function will allocate the object and its buffer in a single memory
+ block. Objects created using this function are not resizable. */
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject*) PyUnicode_New(
+ Py_ssize_t size, /* Number of code points in the new string */
+ Py_UCS4 maxchar /* maximum code point value in the string */
+ );
+#endif
+
+/* Initializes the canonical string representation from a the deprecated
+ wstr/Py_UNICODE representation. This function is used to convert Unicode
+ objects which were created using the old API to the new flexible format
+ introduced with PEP 393.
+
+ Don't call this function directly, use the public PyUnicode_READY() macro
+ instead. */
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(int) _PyUnicode_Ready(
+ PyObject *unicode /* Unicode object */
+ );
+#endif
+
+/* Get a copy of a Unicode string. */
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
+ PyObject *unicode
+ );
+#endif
+
+/* Copy character from one unicode object into another, this function performs
+ character conversion when necessary and falls back to memcpy() if possible.
+
+ Fail if to is too small (smaller than *how_many* or smaller than
+ len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
+ kind(to), or if *to* has more than 1 reference.
+
+ Return the number of written character, or return -1 and raise an exception
+ on error.
+
+ Pseudo-code:
+
+ how_many = min(how_many, len(from) - from_start)
+ to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
+ return how_many
+
+ Note: The function doesn't write a terminating null character.
+ */
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
+ PyObject *to,
+ Py_ssize_t to_start,
+ PyObject *from,
+ Py_ssize_t from_start,
+ Py_ssize_t how_many
+ );
+
+/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
+ may crash if parameters are invalid (e.g. if the output string
+ is too short). */
+PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
+ PyObject *to,
+ Py_ssize_t to_start,
+ PyObject *from,
+ Py_ssize_t from_start,
+ Py_ssize_t how_many
+ );
+#endif
+
+#ifndef Py_LIMITED_API
+/* Fill a string with a character: write fill_char into
+ unicode[start:start+length].
+
+ Fail if fill_char is bigger than the string maximum character, or if the
+ string has more than 1 reference.
+
+ Return the number of written character, or return -1 and raise an exception
+ on error. */
+PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
+ PyObject *unicode,
+ Py_ssize_t start,
+ Py_ssize_t length,
+ Py_UCS4 fill_char
+ );
+
+/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
+ if parameters are invalid (e.g. if length is longer than the string). */
+PyAPI_FUNC(void) _PyUnicode_FastFill(
+ PyObject *unicode,
+ Py_ssize_t start,
+ Py_ssize_t length,
+ Py_UCS4 fill_char
+ );
+#endif
+
/* Create a Unicode Object from the Py_UNICODE buffer u of the given
size.
@@ -448,13 +711,60 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
);
/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
- UTF-8 encoded bytes */
+ UTF-8 encoded bytes. The size is determined with strlen(). */
PyAPI_FUNC(PyObject*) PyUnicode_FromString(
const char *u /* UTF-8 encoded string */
);
+#ifndef Py_LIMITED_API
+/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
+ Scan the string to find the maximum character. */
+PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
+ int kind,
+ const void *buffer,
+ Py_ssize_t size);
+
+/* Create a new string from a buffer of ASCII characters.
+ WARNING: Don't check if the string contains any non-ASCII character. */
+PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
+ const char *buffer,
+ Py_ssize_t size);
+#endif
+
+PyAPI_FUNC(PyObject*) PyUnicode_Substring(
+ PyObject *str,
+ Py_ssize_t start,
+ Py_ssize_t end);
+
+#ifndef Py_LIMITED_API
+/* Compute the maximum character of the substring unicode[start:end].
+ Return 127 for an empty string. */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
+ PyObject *unicode,
+ Py_ssize_t start,
+ Py_ssize_t end);
+#endif
+
+/* Copy the string into a UCS4 buffer including the null character if copy_null
+ is set. Return NULL and raise an exception on error. Raise a ValueError if
+ the buffer is smaller than the string. Return buffer on success.
+
+ buflen is the length of the buffer in (Py_UCS4) characters. */
+PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
+ PyObject *unicode,
+ Py_UCS4* buffer,
+ Py_ssize_t buflen,
+ int copy_null);
+
+/* Copy the string into a UCS4 buffer. A new buffer is allocated using
+ * PyMem_Malloc; if this fails, NULL is returned with a memory error
+ exception set. */
+PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
+
/* Return a read-only pointer to the Unicode object's internal
- Py_UNICODE buffer. */
+ Py_UNICODE buffer.
+ If the wchar_t/Py_UNICODE representation is not yet available, this
+ function will calculate it. */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
@@ -462,30 +772,69 @@ PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
);
#endif
+/* Return a read-only pointer to the Unicode object's internal
+ Py_UNICODE buffer and save the length at size.
+ If the wchar_t/Py_UNICODE representation is not yet available, this
+ function will calculate it. */
+
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
+ PyObject *unicode, /* Unicode object */
+ Py_ssize_t *size /* location where to save the length */
+ );
+#endif
+
/* Get the length of the Unicode object. */
+PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
+ PyObject *unicode
+);
+
+/* Get the number of Py_UNICODE units in the
+ string representation. */
+
PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
PyObject *unicode /* Unicode object */
);
+/* Read a character from the string. */
+
+PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
+ PyObject *unicode,
+ Py_ssize_t index
+ );
+
+/* Write a character to the string. The string must have been created through
+ PyUnicode_New, must not be shared, and must not have been hashed yet.
+
+ Return 0 on success, -1 on error. */
+
+PyAPI_FUNC(int) PyUnicode_WriteChar(
+ PyObject *unicode,
+ Py_ssize_t index,
+ Py_UCS4 character
+ );
+
#ifndef Py_LIMITED_API
/* Get the maximum ordinal for a Unicode character. */
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
#endif
-/* Resize an already allocated Unicode object to the new size length.
+/* Resize an Unicode object. The length is the number of characters, except
+ if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
+ is the number of Py_UNICODE characters.
*unicode is modified to point to the new (resized) object and 0
returned on success.
- This API may only be called by the function which also called the
- Unicode constructor. The refcount on the object must be 1. Otherwise,
- an error is returned.
+ Try to resize the string in place (which is usually faster than allocating
+ a new string and copy characters), or create a new string.
Error handling is implemented as follows: an exception is set, -1
is returned and *unicode left untouched.
-*/
+ WARNING: The function doesn't check string content, the result may not be a
+ string in canonical representation. */
PyAPI_FUNC(int) PyUnicode_Resize(
PyObject **unicode, /* Pointer to the Unicode object */
@@ -542,11 +891,67 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
);
#ifndef Py_LIMITED_API
+typedef struct {
+ PyObject *buffer;
+ void *data;
+ enum PyUnicode_Kind kind;
+ Py_UCS4 maxchar;
+ Py_ssize_t size;
+ Py_ssize_t pos;
+ /* minimum length of the buffer when overallocation is enabled,
+ see _PyUnicodeWriter_Init() */
+ Py_ssize_t min_length;
+ unsigned char overallocate;
+ /* If readonly is 1, buffer is a shared string (cannot be modified)
+ and size is set to 0. */
+ unsigned char readonly;
+} _PyUnicodeWriter ;
+
+/* Initialize a Unicode writer.
+
+ If min_length is greater than zero, _PyUnicodeWriter_Prepare()
+ overallocates the buffer and min_length is the minimum length in characters
+ of the buffer. */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length);
+
+/* Prepare the buffer to write 'length' characters
+ with the specified maximum character.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
+ (((MAXCHAR) <= (WRITER)->maxchar \
+ && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
+ ? 0 \
+ : (((LENGTH) == 0) \
+ ? 0 \
+ : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
+ instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
+ Py_ssize_t length, Py_UCS4 maxchar);
+
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str);
+
+PyAPI_FUNC(PyObject *)
+_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
+
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
+#endif
+
+#ifndef Py_LIMITED_API
/* Format the object based on the format_spec, as defined in PEP 3101
(Advanced String Formatting). */
-PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
- Py_UNICODE *format_spec,
- Py_ssize_t format_spec_len);
+PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
#endif
PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
@@ -559,7 +964,8 @@ PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
#endif
/* Use only if you know it's a string */
-#define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state)
+#define PyUnicode_CHECK_INTERNED(op) \
+ (((PyASCIIObject *)(op))->state.interned)
/* --- wchar_t support for platforms which support it --------------------- */
@@ -606,14 +1012,17 @@ PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
Py_ssize_t *size /* number of characters of the result */
);
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
+#endif
+
#endif
/* --- Unicode ordinals --------------------------------------------------- */
/* Create a Unicode Object from the given Unicode code point ordinal.
- The ordinal must be in range(0x10000) on narrow Python builds
- (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
+ The ordinal must be in range(0x110000). A ValueError is
raised in case it is not.
*/
@@ -651,50 +1060,42 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
/* --- Manage the default encoding ---------------------------------------- */
-/* Return a Python string holding the default encoded value of the
- Unicode object.
-
- Same as PyUnicode_AsUTF8String() except
- the resulting string is cached in the Unicode object for subsequent
- usage by this function. The cached version is needed to implement
- the character buffer interface and will live (at least) as long as
- the Unicode object itself.
-
- The refcount of the string is *not* incremented.
-
- *** Exported for internal use by the interpreter only !!! ***
-
-*/
-
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
- PyObject *unicode,
- const char *errors);
-#endif
-
/* Returns a pointer to the default encoding (UTF-8) of the
Unicode object unicode and the size of the encoded representation
in bytes stored in *size.
In case of an error, no *size is set.
+ This function caches the UTF-8 encoded string in the unicodeobject
+ and subsequent calls will return the same string. The memory is released
+ when the unicodeobject is deallocated.
+
+ _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
+ support the previous internal function with the same behaviour.
+
*** This API is for interpreter INTERNAL USE ONLY and will likely
*** be removed or changed in the future.
*** If you need to access the Unicode object as UTF-8 bytes string,
*** please use PyUnicode_AsUTF8String() instead.
-
*/
#ifndef Py_LIMITED_API
-PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
+PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
PyObject *unicode,
Py_ssize_t *size);
+#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
#endif
/* Returns a pointer to the default encoding (UTF-8) of the
Unicode object unicode.
+ Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
+ in the unicodeobject.
+
+ _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
+ support the previous internal function with the same behaviour.
+
Use of this API is DEPRECATED since no size information can be
extracted from the returned data.
@@ -707,7 +1108,8 @@ PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
*/
#ifndef Py_LIMITED_API
-PyAPI_FUNC(char *) _PyUnicode_AsString(PyObject *unicode);
+PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode);
+#define _PyUnicode_AsString PyUnicode_AsUTF8
#endif
/* Returns "utf-8". */
@@ -812,6 +1214,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
const char *errors /* error handling */
);
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
+ PyObject *unicode, /* Unicode object */
+ int base64SetO, /* Encode RFC2152 Set O characters in base64 */
+ int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
+ const char *errors /* error handling */
+ );
#endif
/* --- UTF-8 Codecs ------------------------------------------------------- */
@@ -834,6 +1242,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
);
#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
+ PyObject *unicode,
+ const char *errors);
+
PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* number of Py_UNICODE chars to encode */
@@ -915,6 +1327,11 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
const char *errors, /* error handling */
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
);
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
+ PyObject *object, /* Unicode object */
+ const char *errors, /* error handling */
+ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+ );
#endif
/* --- UTF-16 Codecs ------------------------------------------------------ */
@@ -995,6 +1412,11 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
const char *errors, /* error handling */
int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
);
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
+ PyObject* unicode, /* Unicode object */
+ const char *errors, /* error handling */
+ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+ );
#endif
/* --- Unicode-Escape Codecs ---------------------------------------------- */
@@ -1064,6 +1486,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
);
#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
+ PyObject* unicode,
+ const char* errors);
+
PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
@@ -1088,6 +1514,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
);
#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
+ PyObject* unicode,
+ const char* errors);
+
PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
const Py_UNICODE *data, /* Unicode char buffer */
Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
@@ -1139,6 +1569,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
(unicode ordinal -> char ordinal) */
const char *errors /* error handling */
);
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
+ PyObject *unicode, /* Unicode object */
+ PyObject *mapping, /* character mapping
+ (unicode ordinal -> char ordinal) */
+ const char *errors /* error handling */
+ );
#endif
/* Translate a Py_UNICODE buffer of the given length by applying a
@@ -1163,7 +1599,7 @@ PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
);
#endif
-#ifdef MS_WIN32
+#ifdef HAVE_MBCS
/* --- MBCS codecs for Windows -------------------------------------------- */
@@ -1180,6 +1616,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
Py_ssize_t *consumed /* bytes consumed */
);
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
+ int code_page, /* code page number */
+ const char *string, /* encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ Py_ssize_t *consumed /* bytes consumed */
+ );
+
PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
PyObject *unicode /* Unicode object */
);
@@ -1187,12 +1631,18 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
+ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
const char *errors /* error handling */
);
#endif
-#endif /* MS_WIN32 */
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
+ int code_page, /* code page number */
+ PyObject *unicode, /* Unicode object */
+ const char *errors /* error handling */
+ );
+
+#endif /* HAVE_MBCS */
/* --- Decimal Encoder ---------------------------------------------------- */
@@ -1240,6 +1690,49 @@ PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
);
#endif
+/* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyObject
+ as argument instead of a raw buffer and length. This function additionally
+ transforms spaces to ASCII because this is what the callers in longobject,
+ floatobject, and complexobject did anyways. */
+
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
+ PyObject *unicode /* Unicode object */
+ );
+#endif
+
+/* --- Locale encoding --------------------------------------------------- */
+
+/* Decode a string from the current locale encoding. The decoder is strict if
+ *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
+ error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
+ be decoded as a surrogate character and *surrogateescape* is not equal to
+ zero, the byte sequence is escaped using the 'surrogateescape' error handler
+ instead of being decoded. *str* must end with a null character but cannot
+ contain embedded null characters. */
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
+ const char *str,
+ Py_ssize_t len,
+ const char *errors);
+
+/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
+ length using strlen(). */
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
+ const char *str,
+ const char *errors);
+
+/* Encode a Unicode object to the current locale encoding. The encoder is
+ strict is *surrogateescape* is equal to zero, otherwise the
+ "surrogateescape" error handler is used. Return a bytes object. The string
+ cannot contain embedded null characters.. */
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
+ PyObject *unicode,
+ const char *errors
+ );
+
/* --- File system encoding ---------------------------------------------- */
/* ParseTuple converter: encode str objects to bytes using
@@ -1292,7 +1785,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
These are capable of handling Unicode objects and strings on input
(we refer to them as strings in the descriptions) and return
- Unicode objects or integers as apporpriate. */
+ Unicode objects or integers as appropriate. */
/* Concat two strings giving a new Unicode string. */
@@ -1427,6 +1920,15 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
int direction /* Find direction: +1 forward, -1 backward */
);
+/* Like PyUnicode_Find, but search for single character only. */
+PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
+ PyObject *str,
+ Py_UCS4 ch,
+ Py_ssize_t start,
+ Py_ssize_t end,
+ int direction
+ );
+
/* Count the number of occurrences of substr in str[start:end]. */
PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
@@ -1463,7 +1965,7 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
/* Rich compare two strings and return one of the following:
- NULL in case an exception was raised
- - Py_True or Py_False for successfuly comparisons
+ - Py_True or Py_False for successfully comparisons
- Py_NotImplemented in case the type combination is unknown
Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
@@ -1514,35 +2016,26 @@ PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
#ifndef Py_LIMITED_API
/* Externally visible for str.strip(unicode) */
PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
- PyUnicodeObject *self,
+ PyObject *self,
int striptype,
PyObject *sepobj
);
#endif
-/* Using the current locale, insert the thousands grouping
- into the string pointed to by buffer. For the argument descriptions,
- see Objects/stringlib/localeutil.h */
-
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
- Py_ssize_t n_buffer,
- Py_UNICODE *digits,
- Py_ssize_t n_digits,
- Py_ssize_t min_width);
-#endif
-
/* Using explicit passed-in values, insert the thousands grouping
into the string pointed to by buffer. For the argument descriptions,
see Objects/stringlib/localeutil.h */
#ifndef Py_LIMITED_API
-PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
- Py_ssize_t n_buffer,
- Py_UNICODE *digits,
- Py_ssize_t n_digits,
- Py_ssize_t min_width,
- const char *grouping,
- const char *thousands_sep);
+PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
+ PyObject *unicode,
+ Py_ssize_t index,
+ Py_ssize_t n_buffer,
+ void *digits,
+ Py_ssize_t n_digits,
+ Py_ssize_t min_width,
+ const char *grouping,
+ PyObject *thousands_sep,
+ Py_UCS4 *maxchar);
#endif
/* === Characters Type APIs =============================================== */
@@ -1598,6 +2091,34 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
Py_UCS4 ch /* Unicode character */
);
+PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
+ Py_UCS4 ch, /* Unicode character */
+ Py_UCS4 *res
+ );
+
+PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
+ Py_UCS4 ch, /* Unicode character */
+ Py_UCS4 *res
+ );
+
+PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
+ Py_UCS4 ch, /* Unicode character */
+ Py_UCS4 *res
+ );
+
+PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
+ Py_UCS4 ch, /* Unicode character */
+ Py_UCS4 *res
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsCased(
+ Py_UCS4 ch /* Unicode character */
+ );
+
PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
Py_UCS4 ch /* Unicode character */
);
@@ -1676,6 +2197,17 @@ PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
);
#endif /* Py_LIMITED_API */
+#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
+PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
+ PyObject *op,
+ int check_content);
+#endif
+
+/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
+PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
+/* Clear all static strings. */
+PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
+
#ifdef __cplusplus
}
#endif
diff --git a/LICENSE b/LICENSE
index 43388e7..88eed1f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -74,6 +74,7 @@ the various releases.
3.2.1 3.2 2011 PSF yes
3.2.2 3.2.1 2011 PSF yes
3.2.3 3.2.2 2012 PSF yes
+ 3.3.0 3.2 2012 PSF yes
Footnotes:
diff --git a/Lib/_dummy_thread.py b/Lib/_dummy_thread.py
index ed50520..13b1f26 100644
--- a/Lib/_dummy_thread.py
+++ b/Lib/_dummy_thread.py
@@ -24,11 +24,7 @@ TIMEOUT_MAX = 2**31
# imports are done when needed on a function-by-function basis. Since threads
# are disabled, the import lock should not be an issue anyway (??).
-class error(Exception):
- """Dummy implementation of _thread.error."""
-
- def __init__(self, *args):
- self.args = args
+error = RuntimeError
def start_new_thread(function, args, kwargs={}):
"""Dummy implementation of _thread.start_new_thread().
diff --git a/Lib/_osx_support.py b/Lib/_osx_support.py
new file mode 100644
index 0000000..b3aad56
--- /dev/null
+++ b/Lib/_osx_support.py
@@ -0,0 +1,488 @@
+"""Shared OS X support functions."""
+
+import os
+import re
+import sys
+
+__all__ = [
+ 'compiler_fixup',
+ 'customize_config_vars',
+ 'customize_compiler',
+ 'get_platform_osx',
+]
+
+# configuration variables that may contain universal build flags,
+# like "-arch" or "-isdkroot", that may need customization for
+# the user environment
+_UNIVERSAL_CONFIG_VARS = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS', 'BASECFLAGS',
+ 'BLDSHARED', 'LDSHARED', 'CC', 'CXX',
+ 'PY_CFLAGS', 'PY_LDFLAGS', 'PY_CPPFLAGS',
+ 'PY_CORE_CFLAGS')
+
+# configuration variables that may contain compiler calls
+_COMPILER_CONFIG_VARS = ('BLDSHARED', 'LDSHARED', 'CC', 'CXX')
+
+# prefix added to original configuration variable names
+_INITPRE = '_OSX_SUPPORT_INITIAL_'
+
+
+def _find_executable(executable, path=None):
+ """Tries to find 'executable' in the directories listed in 'path'.
+
+ A string listing directories separated by 'os.pathsep'; defaults to
+ os.environ['PATH']. Returns the complete filename or None if not found.
+ """
+ if path is None:
+ path = os.environ['PATH']
+
+ paths = path.split(os.pathsep)
+ base, ext = os.path.splitext(executable)
+
+ if (sys.platform == 'win32' or os.name == 'os2') and (ext != '.exe'):
+ executable = executable + '.exe'
+
+ if not os.path.isfile(executable):
+ for p in paths:
+ f = os.path.join(p, executable)
+ if os.path.isfile(f):
+ # the file exists, we have a shot at spawn working
+ return f
+ return None
+ else:
+ return executable
+
+
+def _read_output(commandstring):
+ """Output from succesful command execution or None"""
+ # Similar to os.popen(commandstring, "r").read(),
+ # but without actually using os.popen because that
+ # function is not usable during python bootstrap.
+ # tempfile is also not available then.
+ import contextlib
+ try:
+ import tempfile
+ fp = tempfile.NamedTemporaryFile()
+ except ImportError:
+ fp = open("/tmp/_osx_support.%s"%(
+ os.getpid(),), "w+b")
+
+ with contextlib.closing(fp) as fp:
+ cmd = "%s 2>/dev/null >'%s'" % (commandstring, fp.name)
+ return fp.read().decode('utf-8').strip() if not os.system(cmd) else None
+
+
+def _find_build_tool(toolname):
+ """Find a build tool on current path or using xcrun"""
+ return (_find_executable(toolname)
+ or _read_output("/usr/bin/xcrun -find %s" % (toolname,))
+ or ''
+ )
+
+_SYSTEM_VERSION = None
+
+def _get_system_version():
+ """Return the OS X system version as a string"""
+ # Reading this plist is a documented way to get the system
+ # version (see the documentation for the Gestalt Manager)
+ # We avoid using platform.mac_ver to avoid possible bootstrap issues during
+ # the build of Python itself (distutils is used to build standard library
+ # extensions).
+
+ global _SYSTEM_VERSION
+
+ if _SYSTEM_VERSION is None:
+ _SYSTEM_VERSION = ''
+ try:
+ f = open('/System/Library/CoreServices/SystemVersion.plist')
+ except IOError:
+ # We're on a plain darwin box, fall back to the default
+ # behaviour.
+ pass
+ else:
+ try:
+ m = re.search(r'<key>ProductUserVisibleVersion</key>\s*'
+ r'<string>(.*?)</string>', f.read())
+ finally:
+ f.close()
+ if m is not None:
+ _SYSTEM_VERSION = '.'.join(m.group(1).split('.')[:2])
+ # else: fall back to the default behaviour
+
+ return _SYSTEM_VERSION
+
+def _remove_original_values(_config_vars):
+ """Remove original unmodified values for testing"""
+ # This is needed for higher-level cross-platform tests of get_platform.
+ for k in list(_config_vars):
+ if k.startswith(_INITPRE):
+ del _config_vars[k]
+
+def _save_modified_value(_config_vars, cv, newvalue):
+ """Save modified and original unmodified value of configuration var"""
+
+ oldvalue = _config_vars.get(cv, '')
+ if (oldvalue != newvalue) and (_INITPRE + cv not in _config_vars):
+ _config_vars[_INITPRE + cv] = oldvalue
+ _config_vars[cv] = newvalue
+
+def _supports_universal_builds():
+ """Returns True if universal builds are supported on this system"""
+ # As an approximation, we assume that if we are running on 10.4 or above,
+ # then we are running with an Xcode environment that supports universal
+ # builds, in particular -isysroot and -arch arguments to the compiler. This
+ # is in support of allowing 10.4 universal builds to run on 10.3.x systems.
+
+ osx_version = _get_system_version()
+ if osx_version:
+ try:
+ osx_version = tuple(int(i) for i in osx_version.split('.'))
+ except ValueError:
+ osx_version = ''
+ return bool(osx_version >= (10, 4)) if osx_version else False
+
+
+def _find_appropriate_compiler(_config_vars):
+ """Find appropriate C compiler for extension module builds"""
+
+ # Issue #13590:
+ # The OSX location for the compiler varies between OSX
+ # (or rather Xcode) releases. With older releases (up-to 10.5)
+ # the compiler is in /usr/bin, with newer releases the compiler
+ # can only be found inside Xcode.app if the "Command Line Tools"
+ # are not installed.
+ #
+ # Futhermore, the compiler that can be used varies between
+ # Xcode releases. Upto Xcode 4 it was possible to use 'gcc-4.2'
+ # as the compiler, after that 'clang' should be used because
+ # gcc-4.2 is either not present, or a copy of 'llvm-gcc' that
+ # miscompiles Python.
+
+ # skip checks if the compiler was overriden with a CC env variable
+ if 'CC' in os.environ:
+ return _config_vars
+
+ # The CC config var might contain additional arguments.
+ # Ignore them while searching.
+ cc = oldcc = _config_vars['CC'].split()[0]
+ if not _find_executable(cc):
+ # Compiler is not found on the shell search PATH.
+ # Now search for clang, first on PATH (if the Command LIne
+ # Tools have been installed in / or if the user has provided
+ # another location via CC). If not found, try using xcrun
+ # to find an uninstalled clang (within a selected Xcode).
+
+ # NOTE: Cannot use subprocess here because of bootstrap
+ # issues when building Python itself (and os.popen is
+ # implemented on top of subprocess and is therefore not
+ # usable as well)
+
+ cc = _find_build_tool('clang')
+
+ elif os.path.basename(cc).startswith('gcc'):
+ # Compiler is GCC, check if it is LLVM-GCC
+ data = _read_output("'%s' --version"
+ % (cc.replace("'", "'\"'\"'"),))
+ if 'llvm-gcc' in data:
+ # Found LLVM-GCC, fall back to clang
+ cc = _find_build_tool('clang')
+
+ if not cc:
+ raise SystemError(
+ "Cannot locate working compiler")
+
+ if cc != oldcc:
+ # Found a replacement compiler.
+ # Modify config vars using new compiler, if not already explictly
+ # overriden by an env variable, preserving additional arguments.
+ for cv in _COMPILER_CONFIG_VARS:
+ if cv in _config_vars and cv not in os.environ:
+ cv_split = _config_vars[cv].split()
+ cv_split[0] = cc if cv != 'CXX' else cc + '++'
+ _save_modified_value(_config_vars, cv, ' '.join(cv_split))
+
+ return _config_vars
+
+
+def _remove_universal_flags(_config_vars):
+ """Remove all universal build arguments from config vars"""
+
+ for cv in _UNIVERSAL_CONFIG_VARS:
+ # Do not alter a config var explicitly overriden by env var
+ if cv in _config_vars and cv not in os.environ:
+ flags = _config_vars[cv]
+ flags = re.sub('-arch\s+\w+\s', ' ', flags, re.ASCII)
+ flags = re.sub('-isysroot [^ \t]*', ' ', flags)
+ _save_modified_value(_config_vars, cv, flags)
+
+ return _config_vars
+
+
+def _remove_unsupported_archs(_config_vars):
+ """Remove any unsupported archs from config vars"""
+ # Different Xcode releases support different sets for '-arch'
+ # flags. In particular, Xcode 4.x no longer supports the
+ # PPC architectures.
+ #
+ # This code automatically removes '-arch ppc' and '-arch ppc64'
+ # when these are not supported. That makes it possible to
+ # build extensions on OSX 10.7 and later with the prebuilt
+ # 32-bit installer on the python.org website.
+
+ # skip checks if the compiler was overriden with a CC env variable
+ if 'CC' in os.environ:
+ return _config_vars
+
+ if re.search('-arch\s+ppc', _config_vars['CFLAGS']) is not None:
+ # NOTE: Cannot use subprocess here because of bootstrap
+ # issues when building Python itself
+ status = os.system("'%s' -arch ppc -x c /dev/null 2>/dev/null"%(
+ _config_vars['CC'].replace("'", "'\"'\"'"),))
+ # The Apple compiler drivers return status 255 if no PPC
+ if (status >> 8) == 255:
+ # Compiler doesn't support PPC, remove the related
+ # '-arch' flags if not explicitly overridden by an
+ # environment variable
+ for cv in _UNIVERSAL_CONFIG_VARS:
+ if cv in _config_vars and cv not in os.environ:
+ flags = _config_vars[cv]
+ flags = re.sub('-arch\s+ppc\w*\s', ' ', flags)
+ _save_modified_value(_config_vars, cv, flags)
+
+ return _config_vars
+
+
+def _override_all_archs(_config_vars):
+ """Allow override of all archs with ARCHFLAGS env var"""
+ # NOTE: This name was introduced by Apple in OSX 10.5 and
+ # is used by several scripting languages distributed with
+ # that OS release.
+ if 'ARCHFLAGS' in os.environ:
+ arch = os.environ['ARCHFLAGS']
+ for cv in _UNIVERSAL_CONFIG_VARS:
+ if cv in _config_vars and '-arch' in _config_vars[cv]:
+ flags = _config_vars[cv]
+ flags = re.sub('-arch\s+\w+\s', ' ', flags)
+ flags = flags + ' ' + arch
+ _save_modified_value(_config_vars, cv, flags)
+
+ return _config_vars
+
+
+def _check_for_unavailable_sdk(_config_vars):
+ """Remove references to any SDKs not available"""
+ # If we're on OSX 10.5 or later and the user tries to
+ # compile an extension using an SDK that is not present
+ # on the current machine it is better to not use an SDK
+ # than to fail. This is particularly important with
+ # the standalong Command Line Tools alternative to a
+ # full-blown Xcode install since the CLT packages do not
+ # provide SDKs. If the SDK is not present, it is assumed
+ # that the header files and dev libs have been installed
+ # to /usr and /System/Library by either a standalone CLT
+ # package or the CLT component within Xcode.
+ cflags = _config_vars.get('CFLAGS', '')
+ m = re.search(r'-isysroot\s+(\S+)', cflags)
+ if m is not None:
+ sdk = m.group(1)
+ if not os.path.exists(sdk):
+ for cv in _UNIVERSAL_CONFIG_VARS:
+ # Do not alter a config var explicitly overriden by env var
+ if cv in _config_vars and cv not in os.environ:
+ flags = _config_vars[cv]
+ flags = re.sub(r'-isysroot\s+\S+(?:\s|$)', ' ', flags)
+ _save_modified_value(_config_vars, cv, flags)
+
+ return _config_vars
+
+
+def compiler_fixup(compiler_so, cc_args):
+ """
+ This function will strip '-isysroot PATH' and '-arch ARCH' from the
+ compile flags if the user has specified one them in extra_compile_flags.
+
+ This is needed because '-arch ARCH' adds another architecture to the
+ build, without a way to remove an architecture. Furthermore GCC will
+ barf if multiple '-isysroot' arguments are present.
+ """
+ stripArch = stripSysroot = False
+
+ compiler_so = list(compiler_so)
+
+ if not _supports_universal_builds():
+ # OSX before 10.4.0, these don't support -arch and -isysroot at
+ # all.
+ stripArch = stripSysroot = True
+ else:
+ stripArch = '-arch' in cc_args
+ stripSysroot = '-isysroot' in cc_args
+
+ if stripArch or 'ARCHFLAGS' in os.environ:
+ while True:
+ try:
+ index = compiler_so.index('-arch')
+ # Strip this argument and the next one:
+ del compiler_so[index:index+2]
+ except ValueError:
+ break
+
+ if 'ARCHFLAGS' in os.environ and not stripArch:
+ # User specified different -arch flags in the environ,
+ # see also distutils.sysconfig
+ compiler_so = compiler_so + os.environ['ARCHFLAGS'].split()
+
+ if stripSysroot:
+ while True:
+ try:
+ index = compiler_so.index('-isysroot')
+ # Strip this argument and the next one:
+ del compiler_so[index:index+2]
+ except ValueError:
+ break
+
+ # Check if the SDK that is used during compilation actually exists,
+ # the universal build requires the usage of a universal SDK and not all
+ # users have that installed by default.
+ sysroot = None
+ if '-isysroot' in cc_args:
+ idx = cc_args.index('-isysroot')
+ sysroot = cc_args[idx+1]
+ elif '-isysroot' in compiler_so:
+ idx = compiler_so.index('-isysroot')
+ sysroot = compiler_so[idx+1]
+
+ if sysroot and not os.path.isdir(sysroot):
+ from distutils import log
+ log.warn("Compiling with an SDK that doesn't seem to exist: %s",
+ sysroot)
+ log.warn("Please check your Xcode installation")
+
+ return compiler_so
+
+
+def customize_config_vars(_config_vars):
+ """Customize Python build configuration variables.
+
+ Called internally from sysconfig with a mutable mapping
+ containing name/value pairs parsed from the configured
+ makefile used to build this interpreter. Returns
+ the mapping updated as needed to reflect the environment
+ in which the interpreter is running; in the case of
+ a Python from a binary installer, the installed
+ environment may be very different from the build
+ environment, i.e. different OS levels, different
+ built tools, different available CPU architectures.
+
+ This customization is performed whenever
+ distutils.sysconfig.get_config_vars() is first
+ called. It may be used in environments where no
+ compilers are present, i.e. when installing pure
+ Python dists. Customization of compiler paths
+ and detection of unavailable archs is deferred
+ until the first extention module build is
+ requested (in distutils.sysconfig.customize_compiler).
+
+ Currently called from distutils.sysconfig
+ """
+
+ if not _supports_universal_builds():
+ # On Mac OS X before 10.4, check if -arch and -isysroot
+ # are in CFLAGS or LDFLAGS and remove them if they are.
+ # This is needed when building extensions on a 10.3 system
+ # using a universal build of python.
+ _remove_universal_flags(_config_vars)
+
+ # Allow user to override all archs with ARCHFLAGS env var
+ _override_all_archs(_config_vars)
+
+ # Remove references to sdks that are not found
+ _check_for_unavailable_sdk(_config_vars)
+
+ return _config_vars
+
+
+def customize_compiler(_config_vars):
+ """Customize compiler path and configuration variables.
+
+ This customization is performed when the first
+ extension module build is requested
+ in distutils.sysconfig.customize_compiler).
+ """
+
+ # Find a compiler to use for extension module builds
+ _find_appropriate_compiler(_config_vars)
+
+ # Remove ppc arch flags if not supported here
+ _remove_unsupported_archs(_config_vars)
+
+ # Allow user to override all archs with ARCHFLAGS env var
+ _override_all_archs(_config_vars)
+
+ return _config_vars
+
+
+def get_platform_osx(_config_vars, osname, release, machine):
+ """Filter values for get_platform()"""
+ # called from get_platform() in sysconfig and distutils.util
+ #
+ # For our purposes, we'll assume that the system version from
+ # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
+ # to. This makes the compatibility story a bit more sane because the
+ # machine is going to compile and link as if it were
+ # MACOSX_DEPLOYMENT_TARGET.
+
+ macver = _config_vars.get('MACOSX_DEPLOYMENT_TARGET', '')
+ macrelease = _get_system_version() or macver
+ macver = macver or macrelease
+
+ if macver:
+ release = macver
+ osname = "macosx"
+
+ # Use the original CFLAGS value, if available, so that we
+ # return the same machine type for the platform string.
+ # Otherwise, distutils may consider this a cross-compiling
+ # case and disallow installs.
+ cflags = _config_vars.get(_INITPRE+'CFLAGS',
+ _config_vars.get('CFLAGS', ''))
+ if ((macrelease + '.') >= '10.4.' and
+ '-arch' in cflags.strip()):
+ # The universal build will build fat binaries, but not on
+ # systems before 10.4
+
+ machine = 'fat'
+
+ archs = re.findall('-arch\s+(\S+)', cflags)
+ archs = tuple(sorted(set(archs)))
+
+ if len(archs) == 1:
+ machine = archs[0]
+ elif archs == ('i386', 'ppc'):
+ machine = 'fat'
+ elif archs == ('i386', 'x86_64'):
+ machine = 'intel'
+ elif archs == ('i386', 'ppc', 'x86_64'):
+ machine = 'fat3'
+ elif archs == ('ppc64', 'x86_64'):
+ machine = 'fat64'
+ elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
+ machine = 'universal'
+ else:
+ raise ValueError(
+ "Don't know machine value for archs=%r" % (archs,))
+
+ elif machine == 'i386':
+ # On OSX the machine type returned by uname is always the
+ # 32-bit variant, even if the executable architecture is
+ # the 64-bit variant
+ if sys.maxsize >= 2**32:
+ machine = 'x86_64'
+
+ elif machine in ('PowerPC', 'Power_Macintosh'):
+ # Pick a sane name for the PPC architecture.
+ # See 'i386' case
+ if sys.maxsize >= 2**32:
+ machine = 'ppc64'
+ else:
+ machine = 'ppc'
+
+ return (osname, release, machine)
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
index 2d376d8..fa77ec1 100644
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -5,7 +5,6 @@ Python implementation of the io module.
import os
import abc
import codecs
-import warnings
import errno
# Import _thread instead of threading to reduce startup cost
try:
@@ -15,7 +14,11 @@ except ImportError:
import io
from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
-from errno import EINTR
+
+valid_seek_flags = {0, 1, 2} # Hardwired values
+if hasattr(os, 'SEEK_HOLE') :
+ valid_seek_flags.add(os.SEEK_HOLE)
+ valid_seek_flags.add(os.SEEK_DATA)
# open() uses st_blksize whenever we can
DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
@@ -24,20 +27,12 @@ DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
# defined in io.py. We don't use real inheritance though, because we don't
# want to inherit the C implementations.
-
-class BlockingIOError(IOError):
-
- """Exception raised when I/O would block on a non-blocking I/O stream."""
-
- def __init__(self, errno, strerror, characters_written=0):
- super().__init__(errno, strerror)
- if not isinstance(characters_written, int):
- raise TypeError("characters_written must be a integer")
- self.characters_written = characters_written
+# Rebind for compatibility
+BlockingIOError = BlockingIOError
def open(file, mode="r", buffering=-1, encoding=None, errors=None,
- newline=None, closefd=True):
+ newline=None, closefd=True, opener=None):
r"""Open file and return a stream. Raise IOError upon failure.
@@ -47,21 +42,22 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
wrapped. (If a file descriptor is given, it is closed when the
returned I/O object is closed, unless closefd is set to False.)
- mode is an optional string that specifies the mode in which the file
- is opened. It defaults to 'r' which means open for reading in text
- mode. Other common values are 'w' for writing (truncating the file if
- it already exists), and 'a' for appending (which on some Unix systems,
- means that all writes append to the end of the file regardless of the
- current seek position). In text mode, if encoding is not specified the
- encoding used is platform dependent. (For reading and writing raw
- bytes use binary mode and leave encoding unspecified.) The available
- modes are:
+ mode is an optional string that specifies the mode in which the file is
+ opened. It defaults to 'r' which means open for reading in text mode. Other
+ common values are 'w' for writing (truncating the file if it already
+ exists), 'x' for exclusive creation of a new file, and 'a' for appending
+ (which on some Unix systems, means that all writes append to the end of the
+ file regardless of the current seek position). In text mode, if encoding is
+ not specified the encoding used is platform dependent. (For reading and
+ writing raw bytes use binary mode and leave encoding unspecified.) The
+ available modes are:
========= ===============================================================
Character Meaning
--------- ---------------------------------------------------------------
'r' open for reading (default)
'w' open for writing, truncating the file first
+ 'x' create a new file and open it for writing
'a' open for writing, appending to the end of the file if it exists
'b' binary mode
't' text mode (default)
@@ -72,7 +68,8 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
The default mode is 'rt' (open for reading text). For binary random
access, the mode 'w+b' opens and truncates the file to 0 bytes, while
- 'r+b' opens the file without truncation.
+ 'r+b' opens the file without truncation. The 'x' mode implies 'w' and
+ raises an `FileExistsError` if the file already exists.
Python distinguishes between files opened in binary and text modes,
even when the underlying operating system doesn't. Files opened in
@@ -132,6 +129,12 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
be kept open when the file is closed. This does not work when a file name is
given and must be True in that case.
+ A custom opener can be used by passing a callable as *opener*. The
+ underlying file descriptor for the file object is then obtained by calling
+ *opener* with (*file*, *flags*). *opener* must return an open file
+ descriptor (passing os.open as *opener* results in functionality similar to
+ passing None).
+
open() returns a file object whose type depends on the mode, and
through which the standard file operations such as reading and writing
are performed. When open() is used to open a file in a text mode ('w',
@@ -157,8 +160,9 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
if errors is not None and not isinstance(errors, str):
raise TypeError("invalid errors: %r" % errors)
modes = set(mode)
- if modes - set("arwb+tU") or len(mode) > len(modes):
+ if modes - set("axrwb+tU") or len(mode) > len(modes):
raise ValueError("invalid mode: %r" % mode)
+ creating = "x" in modes
reading = "r" in modes
writing = "w" in modes
appending = "a" in modes
@@ -166,14 +170,14 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
text = "t" in modes
binary = "b" in modes
if "U" in modes:
- if writing or appending:
+ if creating or writing or appending:
raise ValueError("can't use U and writing mode at once")
reading = True
if text and binary:
raise ValueError("can't have text and binary mode at once")
- if reading + writing + appending > 1:
+ if creating + reading + writing + appending > 1:
raise ValueError("can't have read/write/append mode at once")
- if not (reading or writing or appending):
+ if not (creating or reading or writing or appending):
raise ValueError("must have exactly one of read/write/append mode")
if binary and encoding is not None:
raise ValueError("binary mode doesn't take an encoding argument")
@@ -182,11 +186,12 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
if binary and newline is not None:
raise ValueError("binary mode doesn't take a newline argument")
raw = FileIO(file,
+ (creating and "x" or "") +
(reading and "r" or "") +
(writing and "w" or "") +
(appending and "a" or "") +
(updating and "+" or ""),
- closefd)
+ closefd, opener=opener)
line_buffering = False
if buffering == 1 or buffering < 0 and raw.isatty():
buffering = -1
@@ -208,7 +213,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
raise ValueError("can't have unbuffered text I/O")
if updating:
buffer = BufferedRandom(raw, buffering)
- elif writing or appending:
+ elif creating or writing or appending:
buffer = BufferedWriter(raw, buffering)
elif reading:
buffer = BufferedReader(raw, buffering)
@@ -305,6 +310,7 @@ class IOBase(metaclass=abc.ABCMeta):
* 0 -- start of stream (the default); offset should be zero or positive
* 1 -- current stream position; offset may be negative
* 2 -- end of stream; offset is usually negative
+ Some operating systems / file systems could provide additional values.
Return an int indicating the new absolute position.
"""
@@ -865,7 +871,7 @@ class BytesIO(BufferedIOBase):
elif whence == 2:
self._pos = max(0, len(self._buffer) + pos)
else:
- raise ValueError("invalid whence value")
+ raise ValueError("unsupported whence value")
return self._pos
def tell(self):
@@ -954,15 +960,19 @@ class BufferedReader(_BufferedIOMixin):
# Special case for when the number of bytes to read is unspecified.
if n is None or n == -1:
self._reset_read_buf()
+ if hasattr(self.raw, 'readall'):
+ chunk = self.raw.readall()
+ if chunk is None:
+ return buf[pos:] or None
+ else:
+ return buf[pos:] + chunk
chunks = [buf[pos:]] # Strip the consumed bytes.
current_size = 0
while True:
# Read until EOF or until read() would block.
try:
chunk = self.raw.read()
- except IOError as e:
- if e.errno != EINTR:
- raise
+ except InterruptedError:
continue
if chunk in empty_values:
nodata_val = chunk
@@ -984,9 +994,7 @@ class BufferedReader(_BufferedIOMixin):
while avail < n:
try:
chunk = self.raw.read(wanted)
- except IOError as e:
- if e.errno != EINTR:
- raise
+ except InterruptedError:
continue
if chunk in empty_values:
nodata_val = chunk
@@ -1019,9 +1027,7 @@ class BufferedReader(_BufferedIOMixin):
while True:
try:
current = self.raw.read(to_read)
- except IOError as e:
- if e.errno != EINTR:
- raise
+ except InterruptedError:
continue
break
if current:
@@ -1046,7 +1052,7 @@ class BufferedReader(_BufferedIOMixin):
return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
def seek(self, pos, whence=0):
- if not (0 <= whence <= 2):
+ if whence not in valid_seek_flags:
raise ValueError("invalid whence value")
with self._read_lock:
if whence == 1:
@@ -1064,19 +1070,13 @@ class BufferedWriter(_BufferedIOMixin):
DEFAULT_BUFFER_SIZE.
"""
- _warning_stack_offset = 2
-
- def __init__(self, raw,
- buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
+ def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
if not raw.writable():
raise IOError('"raw" argument must be writable.')
_BufferedIOMixin.__init__(self, raw)
if buffer_size <= 0:
raise ValueError("invalid buffer size")
- if max_buffer_size is not None:
- warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
- self._warning_stack_offset)
self.buffer_size = buffer_size
self._write_buf = bytearray()
self._write_lock = Lock()
@@ -1126,13 +1126,11 @@ class BufferedWriter(_BufferedIOMixin):
while self._write_buf:
try:
n = self.raw.write(self._write_buf)
+ except InterruptedError:
+ continue
except BlockingIOError:
raise RuntimeError("self.raw should implement RawIOBase: it "
"should not raise BlockingIOError")
- except IOError as e:
- if e.errno != EINTR:
- raise
- continue
if n is None:
raise BlockingIOError(
errno.EAGAIN,
@@ -1145,8 +1143,8 @@ class BufferedWriter(_BufferedIOMixin):
return _BufferedIOMixin.tell(self) + len(self._write_buf)
def seek(self, pos, whence=0):
- if not (0 <= whence <= 2):
- raise ValueError("invalid whence")
+ if whence not in valid_seek_flags:
+ raise ValueError("invalid whence value")
with self._write_lock:
self._flush_unlocked()
return _BufferedIOMixin.seek(self, pos, whence)
@@ -1168,15 +1166,11 @@ class BufferedRWPair(BufferedIOBase):
# XXX The usefulness of this (compared to having two separate IO
# objects) is questionable.
- def __init__(self, reader, writer,
- buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
+ def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
"""Constructor.
The arguments are two RawIO instances.
"""
- if max_buffer_size is not None:
- warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
-
if not reader.readable():
raise IOError('"reader" argument must be readable.')
@@ -1233,17 +1227,14 @@ class BufferedRandom(BufferedWriter, BufferedReader):
defaults to DEFAULT_BUFFER_SIZE.
"""
- _warning_stack_offset = 3
-
- def __init__(self, raw,
- buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
+ def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
raw._checkSeekable()
BufferedReader.__init__(self, raw, buffer_size)
- BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
+ BufferedWriter.__init__(self, raw, buffer_size)
def seek(self, pos, whence=0):
- if not (0 <= whence <= 2):
- raise ValueError("invalid whence")
+ if whence not in valid_seek_flags:
+ raise ValueError("invalid whence value")
self.flush()
if self._read_buf:
# Undo read ahead.
@@ -1455,7 +1446,7 @@ class TextIOWrapper(TextIOBase):
r"""Character and line based layer over a BufferedIOBase object, buffer.
encoding gives the name of the encoding that the stream will be
- decoded or encoded with. It defaults to locale.getpreferredencoding.
+ decoded or encoded with. It defaults to locale.getpreferredencoding(False).
errors determines the strictness of encoding and decoding (see the
codecs.register) and defaults to "strict".
@@ -1476,6 +1467,9 @@ class TextIOWrapper(TextIOBase):
_CHUNK_SIZE = 2048
+ # The write_through argument has no effect here since this
+ # implementation always writes through. The argument is present only
+ # so that the signature can match the signature of the C version.
def __init__(self, buffer, encoding=None, errors=None, newline=None,
line_buffering=False, write_through=False):
if newline is not None and not isinstance(newline, str):
@@ -1494,7 +1488,7 @@ class TextIOWrapper(TextIOBase):
# Importing locale may fail if Python is being built
encoding = "ascii"
else:
- encoding = locale.getpreferredencoding()
+ encoding = locale.getpreferredencoding(False)
if not isinstance(encoding, str):
raise ValueError("invalid encoding: %r" % encoding)
@@ -1521,6 +1515,7 @@ class TextIOWrapper(TextIOBase):
self._snapshot = None # info for reconstructing decoder state
self._seekable = self._telling = self.buffer.seekable()
self._has_read1 = hasattr(self.buffer, 'read1')
+ self._b2cratio = 0.0
if self._seekable and self.writable():
position = self.buffer.tell()
@@ -1693,7 +1688,12 @@ class TextIOWrapper(TextIOBase):
else:
input_chunk = self.buffer.read(self._CHUNK_SIZE)
eof = not input_chunk
- self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
+ decoded_chars = self._decoder.decode(input_chunk, eof)
+ self._set_decoded_chars(decoded_chars)
+ if decoded_chars:
+ self._b2cratio = len(input_chunk) / len(self._decoded_chars)
+ else:
+ self._b2cratio = 0.0
if self._telling:
# At the snapshot point, len(dec_buffer) bytes before the read,
@@ -1747,20 +1747,56 @@ class TextIOWrapper(TextIOBase):
# forward until it gives us enough decoded characters.
saved_state = decoder.getstate()
try:
+ # Fast search for an acceptable start point, close to our
+ # current pos.
+ # Rationale: calling decoder.decode() has a large overhead
+ # regardless of chunk size; we want the number of such calls to
+ # be O(1) in most situations (common decoders, non-crazy input).
+ # Actually, it will be exactly 1 for fixed-size codecs (all
+ # 8-bit codecs, also UTF-16 and UTF-32).
+ skip_bytes = int(self._b2cratio * chars_to_skip)
+ skip_back = 1
+ assert skip_bytes <= len(next_input)
+ while skip_bytes > 0:
+ decoder.setstate((b'', dec_flags))
+ # Decode up to temptative start point
+ n = len(decoder.decode(next_input[:skip_bytes]))
+ if n <= chars_to_skip:
+ b, d = decoder.getstate()
+ if not b:
+ # Before pos and no bytes buffered in decoder => OK
+ dec_flags = d
+ chars_to_skip -= n
+ break
+ # Skip back by buffered amount and reset heuristic
+ skip_bytes -= len(b)
+ skip_back = 1
+ else:
+ # We're too far ahead, skip back a bit
+ skip_bytes -= skip_back
+ skip_back = skip_back * 2
+ else:
+ skip_bytes = 0
+ decoder.setstate((b'', dec_flags))
+
# Note our initial start point.
- decoder.setstate((b'', dec_flags))
- start_pos = position
- start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
- need_eof = 0
+ start_pos = position + skip_bytes
+ start_flags = dec_flags
+ if chars_to_skip == 0:
+ # We haven't moved from the start point.
+ return self._pack_cookie(start_pos, start_flags)
# Feed the decoder one byte at a time. As we go, note the
# nearest "safe start point" before the current location
# (a point where the decoder has nothing buffered, so seek()
# can safely start from there and advance to this location).
- next_byte = bytearray(1)
- for next_byte[0] in next_input:
+ bytes_fed = 0
+ need_eof = 0
+ # Chars decoded since `start_pos`
+ chars_decoded = 0
+ for i in range(skip_bytes, len(next_input)):
bytes_fed += 1
- chars_decoded += len(decoder.decode(next_byte))
+ chars_decoded += len(decoder.decode(next_input[i:i+1]))
dec_buffer, dec_flags = decoder.getstate()
if not dec_buffer and chars_decoded <= chars_to_skip:
# Decoder buffer is empty, so this is a safe start point.
@@ -1819,8 +1855,7 @@ class TextIOWrapper(TextIOBase):
self._decoder.reset()
return position
if whence != 0:
- raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
- (whence,))
+ raise ValueError("unsupported whence (%r)" % (whence,))
if cookie < 0:
raise ValueError("negative seek position %r" % (cookie,))
self.flush()
diff --git a/Lib/_strptime.py b/Lib/_strptime.py
index fa06376..b0cd3d6 100644
--- a/Lib/_strptime.py
+++ b/Lib/_strptime.py
@@ -486,19 +486,19 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
return (year, month, day,
hour, minute, second,
- weekday, julian, tz, gmtoff, tzname), fraction
+ weekday, julian, tz, tzname, gmtoff), fraction
def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
"""Return a time struct based on the input string and the
format string."""
tt = _strptime(data_string, format)[0]
- return time.struct_time(tt[:9])
+ return time.struct_time(tt[:time._STRUCT_TM_ITEMS])
def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"):
"""Return a class cls instance based on the input string and the
format string."""
tt, fraction = _strptime(data_string, format)
- gmtoff, tzname = tt[-2:]
+ tzname, gmtoff = tt[-2:]
args = tt[:6] + (fraction,)
if gmtoff is not None:
tzdelta = datetime_timedelta(seconds=gmtoff)
diff --git a/Lib/abc.py b/Lib/abc.py
index a6c2dc4..09778e8 100644
--- a/Lib/abc.py
+++ b/Lib/abc.py
@@ -26,7 +26,8 @@ def abstractmethod(funcobj):
class abstractclassmethod(classmethod):
- """A decorator indicating abstract classmethods.
+ """
+ A decorator indicating abstract classmethods.
Similar to abstractmethod.
@@ -36,6 +37,9 @@ class abstractclassmethod(classmethod):
@abstractclassmethod
def my_abstract_classmethod(cls, ...):
...
+
+ 'abstractclassmethod' is deprecated. Use 'classmethod' with
+ 'abstractmethod' instead.
"""
__isabstractmethod__ = True
@@ -46,7 +50,8 @@ class abstractclassmethod(classmethod):
class abstractstaticmethod(staticmethod):
- """A decorator indicating abstract staticmethods.
+ """
+ A decorator indicating abstract staticmethods.
Similar to abstractmethod.
@@ -56,6 +61,9 @@ class abstractstaticmethod(staticmethod):
@abstractstaticmethod
def my_abstract_staticmethod(...):
...
+
+ 'abstractstaticmethod' is deprecated. Use 'staticmethod' with
+ 'abstractmethod' instead.
"""
__isabstractmethod__ = True
@@ -66,7 +74,8 @@ class abstractstaticmethod(staticmethod):
class abstractproperty(property):
- """A decorator indicating abstract properties.
+ """
+ A decorator indicating abstract properties.
Requires that the metaclass is ABCMeta or derived from it. A
class that has a metaclass derived from ABCMeta cannot be
@@ -88,7 +97,11 @@ class abstractproperty(property):
def getx(self): ...
def setx(self, value): ...
x = abstractproperty(getx, setx)
+
+ 'abstractproperty' is deprecated. Use 'property' with 'abstractmethod'
+ instead.
"""
+
__isabstractmethod__ = True
@@ -133,11 +146,14 @@ class ABCMeta(type):
return cls
def register(cls, subclass):
- """Register a virtual subclass of an ABC."""
+ """Register a virtual subclass of an ABC.
+
+ Returns the subclass, to allow usage as a class decorator.
+ """
if not isinstance(subclass, type):
raise TypeError("Can only register classes")
if issubclass(subclass, cls):
- return # Already a subclass
+ return subclass # Already a subclass
# Subtle: test for cycles *after* testing for "already a subclass";
# this means we allow X.register(X) and interpret it as a no-op.
if issubclass(cls, subclass):
@@ -145,6 +161,7 @@ class ABCMeta(type):
raise RuntimeError("Refusing to create an inheritance cycle")
cls._abc_registry.add(subclass)
ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache
+ return subclass
def _dump_registry(cls, file=None):
"""Debug helper to print the ABC registry."""
diff --git a/Lib/aifc.py b/Lib/aifc.py
index 775f39c..ec4f822 100644
--- a/Lib/aifc.py
+++ b/Lib/aifc.py
@@ -136,6 +136,7 @@ writeframesraw.
import struct
import builtins
+import warnings
__all__ = ["Error", "open", "openfp"]
@@ -440,7 +441,7 @@ class Aifc_read:
kludge = 0
if chunk.chunksize == 18:
kludge = 1
- print('Warning: bad COMM chunk size')
+ warnings.warn('Warning: bad COMM chunk size')
chunk.chunksize = 23
#DEBUG end
self._comptype = chunk.read(4)
@@ -484,11 +485,10 @@ class Aifc_read:
# a position 0 and name ''
self._markers.append((id, pos, name))
except EOFError:
- print('Warning: MARK chunk contains only', end=' ')
- print(len(self._markers), end=' ')
- if len(self._markers) == 1: print('marker', end=' ')
- else: print('markers', end=' ')
- print('instead of', nmarkers)
+ w = ('Warning: MARK chunk contains only %s marker%s instead of %s' %
+ (len(self._markers), '' if len(self._markers) == 1 else 's',
+ nmarkers))
+ warnings.warn(w)
class Aifc_write:
# Variables used in this class:
diff --git a/Lib/argparse.py b/Lib/argparse.py
index eb894ca..f25b1b6 100644
--- a/Lib/argparse.py
+++ b/Lib/argparse.py
@@ -71,6 +71,7 @@ __all__ = [
'ArgumentDefaultsHelpFormatter',
'RawDescriptionHelpFormatter',
'RawTextHelpFormatter',
+ 'MetavarTypeHelpFormatter',
'Namespace',
'Action',
'ONE_OR_MORE',
@@ -419,7 +420,8 @@ class HelpFormatter(object):
# produce all arg strings
elif not action.option_strings:
- part = self._format_args(action, action.dest)
+ default = self._get_default_metavar_for_positional(action)
+ part = self._format_args(action, default)
# if it's in a group, strip the outer []
if action in group_actions:
@@ -441,7 +443,7 @@ class HelpFormatter(object):
# if the Optional takes a value, format is:
# -s ARGS or --long ARGS
else:
- default = action.dest.upper()
+ default = self._get_default_metavar_for_optional(action)
args_string = self._format_args(action, default)
part = '%s %s' % (option_string, args_string)
@@ -527,7 +529,8 @@ class HelpFormatter(object):
def _format_action_invocation(self, action):
if not action.option_strings:
- metavar, = self._metavar_formatter(action, action.dest)(1)
+ default = self._get_default_metavar_for_positional(action)
+ metavar, = self._metavar_formatter(action, default)(1)
return metavar
else:
@@ -541,7 +544,7 @@ class HelpFormatter(object):
# if the Optional takes a value, format is:
# -s ARGS, --long ARGS
else:
- default = action.dest.upper()
+ default = self._get_default_metavar_for_optional(action)
args_string = self._format_args(action, default)
for option_string in action.option_strings:
parts.append('%s %s' % (option_string, args_string))
@@ -619,6 +622,12 @@ class HelpFormatter(object):
def _get_help_string(self, action):
return action.help
+ def _get_default_metavar_for_optional(self, action):
+ return action.dest.upper()
+
+ def _get_default_metavar_for_positional(self, action):
+ return action.dest
+
class RawDescriptionHelpFormatter(HelpFormatter):
"""Help message formatter which retains any formatting in descriptions.
@@ -628,7 +637,7 @@ class RawDescriptionHelpFormatter(HelpFormatter):
"""
def _fill_text(self, text, width, indent):
- return ''.join([indent + line for line in text.splitlines(True)])
+ return ''.join(indent + line for line in text.splitlines(keepends=True))
class RawTextHelpFormatter(RawDescriptionHelpFormatter):
@@ -659,6 +668,22 @@ class ArgumentDefaultsHelpFormatter(HelpFormatter):
return help
+class MetavarTypeHelpFormatter(HelpFormatter):
+ """Help message formatter which uses the argument 'type' as the default
+ metavar value (instead of the argument 'dest')
+
+ Only the name of this class is considered a public API. All the methods
+ provided by the class are considered an implementation detail.
+ """
+
+ def _get_default_metavar_for_optional(self, action):
+ return action.type.__name__
+
+ def _get_default_metavar_for_positional(self, action):
+ return action.type.__name__
+
+
+
# =====================
# Options and Arguments
# =====================
@@ -1554,7 +1579,6 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
usage=None,
description=None,
epilog=None,
- version=None,
parents=[],
formatter_class=HelpFormatter,
prefix_chars='-',
@@ -1563,14 +1587,6 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
conflict_handler='error',
add_help=True):
- if version is not None:
- import warnings
- warnings.warn(
- """The "version" argument to ArgumentParser is deprecated. """
- """Please use """
- """"add_argument(..., action='version', version="N", ...)" """
- """instead""", DeprecationWarning)
-
superinit = super(ArgumentParser, self).__init__
superinit(description=description,
prefix_chars=prefix_chars,
@@ -1584,7 +1600,6 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
self.prog = prog
self.usage = usage
self.epilog = epilog
- self.version = version
self.formatter_class = formatter_class
self.fromfile_prefix_chars = fromfile_prefix_chars
self.add_help = add_help
@@ -1599,7 +1614,7 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
return string
self.register('type', None, identity)
- # add help and version arguments if necessary
+ # add help argument if necessary
# (using explicit default to override global argument_default)
default_prefix = '-' if '-' in prefix_chars else prefix_chars[0]
if self.add_help:
@@ -1607,12 +1622,6 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
default_prefix+'h', default_prefix*2+'help',
action='help', default=SUPPRESS,
help=_('show this help message and exit'))
- if self.version:
- self.add_argument(
- default_prefix+'v', default_prefix*2+'version',
- action='version', default=SUPPRESS,
- version=self.version,
- help=_("show program's version number and exit"))
# add parent arguments and defaults
for parent in parents:
@@ -1632,7 +1641,6 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
'prog',
'usage',
'description',
- 'version',
'formatter_class',
'conflict_handler',
'add_help',
@@ -1940,29 +1948,29 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
# if we didn't consume all the argument strings, there were extras
extras.extend(arg_strings[stop_index:])
- # if we didn't use all the Positional objects, there were too few
- # arg strings supplied.
- if positionals:
- self.error(_('too few arguments'))
-
- # make sure all required actions were present, and convert defaults.
+ # make sure all required actions were present and also convert
+ # action defaults which were not given as arguments
+ required_actions = []
for action in self._actions:
if action not in seen_actions:
if action.required:
- name = _get_action_name(action)
- self.error(_('argument %s is required') % name)
+ required_actions.append(_get_action_name(action))
else:
# Convert action default now instead of doing it before
# parsing arguments to avoid calling convert functions
# twice (which may fail) if the argument was given, but
# only if it was defined already in the namespace
if (action.default is not None and
- isinstance(action.default, str) and
- hasattr(namespace, action.dest) and
- action.default is getattr(namespace, action.dest)):
+ isinstance(action.default, str) and
+ hasattr(namespace, action.dest) and
+ action.default is getattr(namespace, action.dest)):
setattr(namespace, action.dest,
self._get_value(action, action.default))
+ if required_actions:
+ self.error(_('the following arguments are required: %s') %
+ ', '.join(required_actions))
+
# make sure all required groups had one option present
for group in self._mutually_exclusive_groups:
if group.required:
@@ -2314,16 +2322,6 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
# determine help from format above
return formatter.format_help()
- def format_version(self):
- import warnings
- warnings.warn(
- 'The format_version method is deprecated -- the "version" '
- 'argument to ArgumentParser is no longer supported.',
- DeprecationWarning)
- formatter = self._get_formatter()
- formatter.add_text(self.version)
- return formatter.format_help()
-
def _get_formatter(self):
return self.formatter_class(prog=self.prog)
@@ -2340,14 +2338,6 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer):
file = _sys.stdout
self._print_message(self.format_help(), file)
- def print_version(self, file=None):
- import warnings
- warnings.warn(
- 'The print_version method is deprecated -- the "version" '
- 'argument to ArgumentParser is no longer supported.',
- DeprecationWarning)
- self._print_message(self.format_version(), file)
-
def _print_message(self, message, file=None):
if message:
if file is None:
diff --git a/Lib/ast.py b/Lib/ast.py
index fb5adac..13f59f9 100644
--- a/Lib/ast.py
+++ b/Lib/ast.py
@@ -25,7 +25,6 @@
:license: Python License.
"""
from _ast import *
-from _ast import __version__
def parse(source, filename='<unknown>', mode='exec'):
diff --git a/Lib/asynchat.py b/Lib/asynchat.py
index 6558512..4e26bb5 100644
--- a/Lib/asynchat.py
+++ b/Lib/asynchat.py
@@ -49,18 +49,6 @@ import socket
import asyncore
from collections import deque
-def buffer(obj, start=None, stop=None):
- # if memoryview objects gain slicing semantics,
- # this function will change for the better
- # memoryview used for the TypeError
- memoryview(obj)
- if start == None:
- start = 0
- if stop == None:
- stop = len(obj)
- x = obj[start:stop]
- ## print("buffer type is: %s"%(type(x),))
- return x
class async_chat (asyncore.dispatcher):
"""This is an abstract class. You must derive from this class, and add
@@ -75,7 +63,7 @@ class async_chat (asyncore.dispatcher):
# sign of an application bug that we don't want to pass silently
use_encoding = 0
- encoding = 'latin1'
+ encoding = 'latin-1'
def __init__ (self, sock=None, map=None):
# for string terminator matching
@@ -240,7 +228,7 @@ class async_chat (asyncore.dispatcher):
# handle classic producer behavior
obs = self.ac_out_buffer_size
try:
- data = buffer(first, 0, obs)
+ data = first[:obs]
except TypeError:
data = first.more()
if data:
diff --git a/Lib/asyncore.py b/Lib/asyncore.py
index b06077f..2cac88b 100644
--- a/Lib/asyncore.py
+++ b/Lib/asyncore.py
@@ -54,7 +54,7 @@ import warnings
import os
from errno import EALREADY, EINPROGRESS, EWOULDBLOCK, ECONNRESET, EINVAL, \
- ENOTCONN, ESHUTDOWN, EINTR, EISCONN, EBADF, ECONNABORTED, EPIPE, EAGAIN, \
+ ENOTCONN, ESHUTDOWN, EISCONN, EBADF, ECONNABORTED, EPIPE, EAGAIN, \
errorcode
_DISCONNECTED = frozenset((ECONNRESET, ENOTCONN, ESHUTDOWN, ECONNABORTED, EPIPE,
@@ -143,11 +143,8 @@ def poll(timeout=0.0, map=None):
try:
r, w, e = select.select(r, w, e, timeout)
- except select.error as err:
- if err.args[0] != EINTR:
- raise
- else:
- return
+ except InterruptedError:
+ return
for fd in r:
obj = map.get(fd)
@@ -184,15 +181,10 @@ def poll2(timeout=0.0, map=None):
if obj.writable() and not obj.accepting:
flags |= select.POLLOUT
if flags:
- # Only check for exceptions if object was either readable
- # or writable.
- flags |= select.POLLERR | select.POLLHUP | select.POLLNVAL
pollster.register(fd, flags)
try:
r = pollster.poll(timeout)
- except select.error as err:
- if err.args[0] != EINTR:
- raise
+ except InterruptedError:
r = []
for fd, flags in r:
obj = map.get(fd)
@@ -292,7 +284,7 @@ class dispatcher:
del map[fd]
self._fileno = None
- def create_socket(self, family, type):
+ def create_socket(self, family=socket.AF_INET, type=socket.SOCK_STREAM):
self.family_and_type = family, type
sock = socket.socket(family, type)
sock.setblocking(0)
diff --git a/Lib/base64.py b/Lib/base64.py
index 895d813..4042f00 100755
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -29,14 +29,16 @@ __all__ = [
bytes_types = (bytes, bytearray) # Types acceptable as binary data
-
-def _translate(s, altchars):
- if not isinstance(s, bytes_types):
- raise TypeError("expected bytes, not %s" % s.__class__.__name__)
- translation = bytearray(range(256))
- for k, v in altchars.items():
- translation[ord(k)] = v[0]
- return s.translate(translation)
+def _bytes_from_decode_data(s):
+ if isinstance(s, str):
+ try:
+ return s.encode('ascii')
+ except UnicodeEncodeError:
+ raise ValueError('string argument should contain only ASCII characters')
+ elif isinstance(s, bytes_types):
+ return s
+ else:
+ raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__)
@@ -61,7 +63,7 @@ def b64encode(s, altchars=None):
raise TypeError("expected bytes, not %s"
% altchars.__class__.__name__)
assert len(altchars) == 2, repr(altchars)
- return _translate(encoded, {'+': altchars[0:1], '/': altchars[1:2]})
+ return encoded.translate(bytes.maketrans(b'+/', altchars))
return encoded
@@ -79,14 +81,11 @@ def b64decode(s, altchars=None, validate=False):
discarded prior to the padding check. If validate is True,
non-base64-alphabet characters in the input result in a binascii.Error.
"""
- if not isinstance(s, bytes_types):
- raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+ s = _bytes_from_decode_data(s)
if altchars is not None:
- if not isinstance(altchars, bytes_types):
- raise TypeError("expected bytes, not %s"
- % altchars.__class__.__name__)
+ altchars = _bytes_from_decode_data(altchars)
assert len(altchars) == 2, repr(altchars)
- s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'})
+ s = s.translate(bytes.maketrans(altchars, b'+/'))
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
raise binascii.Error('Non-base64 digit found')
return binascii.a2b_base64(s)
@@ -109,6 +108,10 @@ def standard_b64decode(s):
"""
return b64decode(s)
+
+_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
+_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
+
def urlsafe_b64encode(s):
"""Encode a byte string using a url-safe Base64 alphabet.
@@ -116,7 +119,7 @@ def urlsafe_b64encode(s):
returned. The alphabet uses '-' instead of '+' and '_' instead of
'/'.
"""
- return b64encode(s, b'-_')
+ return b64encode(s).translate(_urlsafe_encode_translation)
def urlsafe_b64decode(s):
"""Decode a byte string encoded with the standard Base64 alphabet.
@@ -128,7 +131,9 @@ def urlsafe_b64decode(s):
The alphabet uses '-' instead of '+' and '_' instead of '/'.
"""
- return b64decode(s, b'-_')
+ s = _bytes_from_decode_data(s)
+ s = s.translate(_urlsafe_decode_translation)
+ return b64decode(s)
@@ -211,8 +216,7 @@ def b32decode(s, casefold=False, map01=None):
the input is incorrectly padded or if there are non-alphabet
characters present in the input.
"""
- if not isinstance(s, bytes_types):
- raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+ s = _bytes_from_decode_data(s)
quanta, leftover = divmod(len(s), 8)
if leftover:
raise binascii.Error('Incorrect padding')
@@ -220,10 +224,9 @@ def b32decode(s, casefold=False, map01=None):
# False, or the character to map the digit 1 (one) to. It should be
# either L (el) or I (eye).
if map01 is not None:
- if not isinstance(map01, bytes_types):
- raise TypeError("expected bytes, not %s" % map01.__class__.__name__)
+ map01 = _bytes_from_decode_data(map01)
assert len(map01) == 1, repr(map01)
- s = _translate(s, {b'0': b'O', b'1': map01})
+ s = s.translate(bytes.maketrans(b'01', b'O' + map01))
if casefold:
s = s.upper()
# Strip off pad characters from the right. We need to count the pad
@@ -292,8 +295,7 @@ def b16decode(s, casefold=False):
s were incorrectly padded or if there are non-alphabet characters
present in the string.
"""
- if not isinstance(s, bytes_types):
- raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+ s = _bytes_from_decode_data(s)
if casefold:
s = s.upper()
if re.search(b'[^0-9A-F]', s):
diff --git a/Lib/binhex.py b/Lib/binhex.py
index 999a675..7bf9278 100644
--- a/Lib/binhex.py
+++ b/Lib/binhex.py
@@ -23,7 +23,6 @@ hexbin(inputfilename, outputfilename)
#
import io
import os
-import sys
import struct
import binascii
diff --git a/Lib/bz2.py b/Lib/bz2.py
new file mode 100644
index 0000000..c307507
--- /dev/null
+++ b/Lib/bz2.py
@@ -0,0 +1,504 @@
+"""Interface to the libbzip2 compression library.
+
+This module provides a file interface, classes for incremental
+(de)compression, and functions for one-shot (de)compression.
+"""
+
+__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
+ "open", "compress", "decompress"]
+
+__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
+
+import builtins
+import io
+import warnings
+
+try:
+ from threading import RLock
+except ImportError:
+ from dummy_threading import RLock
+
+from _bz2 import BZ2Compressor, BZ2Decompressor
+
+
+_MODE_CLOSED = 0
+_MODE_READ = 1
+_MODE_READ_EOF = 2
+_MODE_WRITE = 3
+
+_BUFFER_SIZE = 8192
+
+
+class BZ2File(io.BufferedIOBase):
+
+ """A file object providing transparent bzip2 (de)compression.
+
+ A BZ2File can act as a wrapper for an existing file object, or refer
+ directly to a named file on disk.
+
+ Note that BZ2File provides a *binary* file interface - data read is
+ returned as bytes, and data to be written should be given as bytes.
+ """
+
+ def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
+ """Open a bzip2-compressed file.
+
+ If filename is a str or bytes object, is gives the name of the file to
+ be opened. Otherwise, it should be a file object, which will be used to
+ read or write the compressed data.
+
+ mode can be 'r' for reading (default), 'w' for (over)writing, or 'a' for
+ appending. These can equivalently be given as 'rb', 'wb', and 'ab'.
+
+ buffering is ignored. Its use is deprecated.
+
+ If mode is 'w' or 'a', compresslevel can be a number between 1
+ and 9 specifying the level of compression: 1 produces the least
+ compression, and 9 (default) produces the most compression.
+
+ If mode is 'r', the input file may be the concatenation of
+ multiple compressed streams.
+ """
+ # This lock must be recursive, so that BufferedIOBase's
+ # readline(), readlines() and writelines() don't deadlock.
+ self._lock = RLock()
+ self._fp = None
+ self._closefp = False
+ self._mode = _MODE_CLOSED
+ self._pos = 0
+ self._size = -1
+
+ if buffering is not None:
+ warnings.warn("Use of 'buffering' argument is deprecated",
+ DeprecationWarning)
+
+ if not (1 <= compresslevel <= 9):
+ raise ValueError("compresslevel must be between 1 and 9")
+
+ if mode in ("", "r", "rb"):
+ mode = "rb"
+ mode_code = _MODE_READ
+ self._decompressor = BZ2Decompressor()
+ self._buffer = b""
+ self._buffer_offset = 0
+ elif mode in ("w", "wb"):
+ mode = "wb"
+ mode_code = _MODE_WRITE
+ self._compressor = BZ2Compressor(compresslevel)
+ elif mode in ("a", "ab"):
+ mode = "ab"
+ mode_code = _MODE_WRITE
+ self._compressor = BZ2Compressor(compresslevel)
+ else:
+ raise ValueError("Invalid mode: {!r}".format(mode))
+
+ if isinstance(filename, (str, bytes)):
+ self._fp = builtins.open(filename, mode)
+ self._closefp = True
+ self._mode = mode_code
+ elif hasattr(filename, "read") or hasattr(filename, "write"):
+ self._fp = filename
+ self._mode = mode_code
+ else:
+ raise TypeError("filename must be a str or bytes object, or a file")
+
+ def close(self):
+ """Flush and close the file.
+
+ May be called more than once without error. Once the file is
+ closed, any other operation on it will raise a ValueError.
+ """
+ with self._lock:
+ if self._mode == _MODE_CLOSED:
+ return
+ try:
+ if self._mode in (_MODE_READ, _MODE_READ_EOF):
+ self._decompressor = None
+ elif self._mode == _MODE_WRITE:
+ self._fp.write(self._compressor.flush())
+ self._compressor = None
+ finally:
+ try:
+ if self._closefp:
+ self._fp.close()
+ finally:
+ self._fp = None
+ self._closefp = False
+ self._mode = _MODE_CLOSED
+ self._buffer = b""
+ self._buffer_offset = 0
+
+ @property
+ def closed(self):
+ """True if this file is closed."""
+ return self._mode == _MODE_CLOSED
+
+ def fileno(self):
+ """Return the file descriptor for the underlying file."""
+ self._check_not_closed()
+ return self._fp.fileno()
+
+ def seekable(self):
+ """Return whether the file supports seeking."""
+ return self.readable() and self._fp.seekable()
+
+ def readable(self):
+ """Return whether the file was opened for reading."""
+ self._check_not_closed()
+ return self._mode in (_MODE_READ, _MODE_READ_EOF)
+
+ def writable(self):
+ """Return whether the file was opened for writing."""
+ self._check_not_closed()
+ return self._mode == _MODE_WRITE
+
+ # Mode-checking helper functions.
+
+ def _check_not_closed(self):
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ def _check_can_read(self):
+ if self._mode not in (_MODE_READ, _MODE_READ_EOF):
+ self._check_not_closed()
+ raise io.UnsupportedOperation("File not open for reading")
+
+ def _check_can_write(self):
+ if self._mode != _MODE_WRITE:
+ self._check_not_closed()
+ raise io.UnsupportedOperation("File not open for writing")
+
+ def _check_can_seek(self):
+ if self._mode not in (_MODE_READ, _MODE_READ_EOF):
+ self._check_not_closed()
+ raise io.UnsupportedOperation("Seeking is only supported "
+ "on files open for reading")
+ if not self._fp.seekable():
+ raise io.UnsupportedOperation("The underlying file object "
+ "does not support seeking")
+
+ # Fill the readahead buffer if it is empty. Returns False on EOF.
+ def _fill_buffer(self):
+ if self._mode == _MODE_READ_EOF:
+ return False
+ # Depending on the input data, our call to the decompressor may not
+ # return any data. In this case, try again after reading another block.
+ while self._buffer_offset == len(self._buffer):
+ rawblock = (self._decompressor.unused_data or
+ self._fp.read(_BUFFER_SIZE))
+
+ if not rawblock:
+ if self._decompressor.eof:
+ self._mode = _MODE_READ_EOF
+ self._size = self._pos
+ return False
+ else:
+ raise EOFError("Compressed file ended before the "
+ "end-of-stream marker was reached")
+
+ # Continue to next stream.
+ if self._decompressor.eof:
+ self._decompressor = BZ2Decompressor()
+
+ self._buffer = self._decompressor.decompress(rawblock)
+ self._buffer_offset = 0
+ return True
+
+ # Read data until EOF.
+ # If return_data is false, consume the data without returning it.
+ def _read_all(self, return_data=True):
+ # The loop assumes that _buffer_offset is 0. Ensure that this is true.
+ self._buffer = self._buffer[self._buffer_offset:]
+ self._buffer_offset = 0
+
+ blocks = []
+ while self._fill_buffer():
+ if return_data:
+ blocks.append(self._buffer)
+ self._pos += len(self._buffer)
+ self._buffer = b""
+ if return_data:
+ return b"".join(blocks)
+
+ # Read a block of up to n bytes.
+ # If return_data is false, consume the data without returning it.
+ def _read_block(self, n, return_data=True):
+ # If we have enough data buffered, return immediately.
+ end = self._buffer_offset + n
+ if end <= len(self._buffer):
+ data = self._buffer[self._buffer_offset : end]
+ self._buffer_offset = end
+ self._pos += len(data)
+ return data if return_data else None
+
+ # The loop assumes that _buffer_offset is 0. Ensure that this is true.
+ self._buffer = self._buffer[self._buffer_offset:]
+ self._buffer_offset = 0
+
+ blocks = []
+ while n > 0 and self._fill_buffer():
+ if n < len(self._buffer):
+ data = self._buffer[:n]
+ self._buffer_offset = n
+ else:
+ data = self._buffer
+ self._buffer = b""
+ if return_data:
+ blocks.append(data)
+ self._pos += len(data)
+ n -= len(data)
+ if return_data:
+ return b"".join(blocks)
+
+ def peek(self, n=0):
+ """Return buffered data without advancing the file position.
+
+ Always returns at least one byte of data, unless at EOF.
+ The exact number of bytes returned is unspecified.
+ """
+ with self._lock:
+ self._check_can_read()
+ if not self._fill_buffer():
+ return b""
+ return self._buffer[self._buffer_offset:]
+
+ def read(self, size=-1):
+ """Read up to size uncompressed bytes from the file.
+
+ If size is negative or omitted, read until EOF is reached.
+ Returns b'' if the file is already at EOF.
+ """
+ with self._lock:
+ self._check_can_read()
+ if size == 0:
+ return b""
+ elif size < 0:
+ return self._read_all()
+ else:
+ return self._read_block(size)
+
+ def read1(self, size=-1):
+ """Read up to size uncompressed bytes, while trying to avoid
+ making multiple reads from the underlying stream.
+
+ Returns b'' if the file is at EOF.
+ """
+ # Usually, read1() calls _fp.read() at most once. However, sometimes
+ # this does not give enough data for the decompressor to make progress.
+ # In this case we make multiple reads, to avoid returning b"".
+ with self._lock:
+ self._check_can_read()
+ if (size == 0 or
+ # Only call _fill_buffer() if the buffer is actually empty.
+ # This gives a significant speedup if *size* is small.
+ (self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
+ return b""
+ if size > 0:
+ data = self._buffer[self._buffer_offset :
+ self._buffer_offset + size]
+ self._buffer_offset += len(data)
+ else:
+ data = self._buffer[self._buffer_offset:]
+ self._buffer = b""
+ self._buffer_offset = 0
+ self._pos += len(data)
+ return data
+
+ def readinto(self, b):
+ """Read up to len(b) bytes into b.
+
+ Returns the number of bytes read (0 for EOF).
+ """
+ with self._lock:
+ return io.BufferedIOBase.readinto(self, b)
+
+ def readline(self, size=-1):
+ """Read a line of uncompressed bytes from the file.
+
+ The terminating newline (if present) is retained. If size is
+ non-negative, no more than size bytes will be read (in which
+ case the line may be incomplete). Returns b'' if already at EOF.
+ """
+ if not isinstance(size, int):
+ if not hasattr(size, "__index__"):
+ raise TypeError("Integer argument expected")
+ size = size.__index__()
+ with self._lock:
+ self._check_can_read()
+ # Shortcut for the common case - the whole line is in the buffer.
+ if size < 0:
+ end = self._buffer.find(b"\n", self._buffer_offset) + 1
+ if end > 0:
+ line = self._buffer[self._buffer_offset : end]
+ self._buffer_offset = end
+ self._pos += len(line)
+ return line
+ return io.BufferedIOBase.readline(self, size)
+
+ def readlines(self, size=-1):
+ """Read a list of lines of uncompressed bytes from the file.
+
+ size can be specified to control the number of lines read: no
+ further lines will be read once the total size of the lines read
+ so far equals or exceeds size.
+ """
+ if not isinstance(size, int):
+ if not hasattr(size, "__index__"):
+ raise TypeError("Integer argument expected")
+ size = size.__index__()
+ with self._lock:
+ return io.BufferedIOBase.readlines(self, size)
+
+ def write(self, data):
+ """Write a byte string to the file.
+
+ Returns the number of uncompressed bytes written, which is
+ always len(data). Note that due to buffering, the file on disk
+ may not reflect the data written until close() is called.
+ """
+ with self._lock:
+ self._check_can_write()
+ compressed = self._compressor.compress(data)
+ self._fp.write(compressed)
+ self._pos += len(data)
+ return len(data)
+
+ def writelines(self, seq):
+ """Write a sequence of byte strings to the file.
+
+ Returns the number of uncompressed bytes written.
+ seq can be any iterable yielding byte strings.
+
+ Line separators are not added between the written byte strings.
+ """
+ with self._lock:
+ return io.BufferedIOBase.writelines(self, seq)
+
+ # Rewind the file to the beginning of the data stream.
+ def _rewind(self):
+ self._fp.seek(0, 0)
+ self._mode = _MODE_READ
+ self._pos = 0
+ self._decompressor = BZ2Decompressor()
+ self._buffer = b""
+ self._buffer_offset = 0
+
+ def seek(self, offset, whence=0):
+ """Change the file position.
+
+ The new position is specified by offset, relative to the
+ position indicated by whence. Values for whence are:
+
+ 0: start of stream (default); offset must not be negative
+ 1: current stream position
+ 2: end of stream; offset must not be positive
+
+ Returns the new file position.
+
+ Note that seeking is emulated, so depending on the parameters,
+ this operation may be extremely slow.
+ """
+ with self._lock:
+ self._check_can_seek()
+
+ # Recalculate offset as an absolute file position.
+ if whence == 0:
+ pass
+ elif whence == 1:
+ offset = self._pos + offset
+ elif whence == 2:
+ # Seeking relative to EOF - we need to know the file's size.
+ if self._size < 0:
+ self._read_all(return_data=False)
+ offset = self._size + offset
+ else:
+ raise ValueError("Invalid value for whence: {}".format(whence))
+
+ # Make it so that offset is the number of bytes to skip forward.
+ if offset < self._pos:
+ self._rewind()
+ else:
+ offset -= self._pos
+
+ # Read and discard data until we reach the desired position.
+ self._read_block(offset, return_data=False)
+
+ return self._pos
+
+ def tell(self):
+ """Return the current file position."""
+ with self._lock:
+ self._check_not_closed()
+ return self._pos
+
+
+def open(filename, mode="rb", compresslevel=9,
+ encoding=None, errors=None, newline=None):
+ """Open a bzip2-compressed file in binary or text mode.
+
+ The filename argument can be an actual filename (a str or bytes object), or
+ an existing file object to read from or write to.
+
+ The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode,
+ or "rt", "wt" or "at" for text mode. The default mode is "rb", and the
+ default compresslevel is 9.
+
+ For binary mode, this function is equivalent to the BZ2File constructor:
+ BZ2File(filename, mode, compresslevel). In this case, the encoding, errors
+ and newline arguments must not be provided.
+
+ For text mode, a BZ2File object is created, and wrapped in an
+ io.TextIOWrapper instance with the specified encoding, error handling
+ behavior, and line ending(s).
+
+ """
+ if "t" in mode:
+ if "b" in mode:
+ raise ValueError("Invalid mode: %r" % (mode,))
+ else:
+ if encoding is not None:
+ raise ValueError("Argument 'encoding' not supported in binary mode")
+ if errors is not None:
+ raise ValueError("Argument 'errors' not supported in binary mode")
+ if newline is not None:
+ raise ValueError("Argument 'newline' not supported in binary mode")
+
+ bz_mode = mode.replace("t", "")
+ binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
+
+ if "t" in mode:
+ return io.TextIOWrapper(binary_file, encoding, errors, newline)
+ else:
+ return binary_file
+
+
+def compress(data, compresslevel=9):
+ """Compress a block of data.
+
+ compresslevel, if given, must be a number between 1 and 9.
+
+ For incremental compression, use a BZ2Compressor object instead.
+ """
+ comp = BZ2Compressor(compresslevel)
+ return comp.compress(data) + comp.flush()
+
+
+def decompress(data):
+ """Decompress a block of data.
+
+ For incremental decompression, use a BZ2Decompressor object instead.
+ """
+ if len(data) == 0:
+ return b""
+
+ results = []
+ while True:
+ decomp = BZ2Decompressor()
+ results.append(decomp.decompress(data))
+ if not decomp.eof:
+ raise ValueError("Compressed data ended before the "
+ "end-of-stream marker was reached")
+ if not decomp.unused_data:
+ return b"".join(results)
+ # There is unused data left over. Proceed to next stream.
+ data = decomp.unused_data
diff --git a/Lib/cgi.py b/Lib/cgi.py
index 90a3345..e964f0c 100755
--- a/Lib/cgi.py
+++ b/Lib/cgi.py
@@ -76,7 +76,7 @@ def initlog(*allargs):
send an error message).
"""
- global logfp, log
+ global log, logfile, logfp
if logfile and not logfp:
try:
logfp = open(logfile, "a")
@@ -96,6 +96,15 @@ def nolog(*allargs):
"""Dummy function, assigned to log when logging is disabled."""
pass
+def closelog():
+ """Close the log file."""
+ global log, logfile, logfp
+ logfile = ''
+ if logfp:
+ logfp.close()
+ logfp = None
+ log = initlog
+
log = initlog # The current logging function
@@ -1003,7 +1012,7 @@ environment as well. Here are some common variable names:
def escape(s, quote=None):
"""Deprecated API."""
warn("cgi.escape is deprecated, use html.escape instead",
- PendingDeprecationWarning, stacklevel=2)
+ DeprecationWarning, stacklevel=2)
s = s.replace("&", "&amp;") # Must be done first!
s = s.replace("<", "&lt;")
s = s.replace(">", "&gt;")
diff --git a/Lib/cgitb.py b/Lib/cgitb.py
index 7b52c8e..e3ce2cb 100644
--- a/Lib/cgitb.py
+++ b/Lib/cgitb.py
@@ -31,7 +31,6 @@ import tempfile
import time
import tokenize
import traceback
-import types
def reset():
"""Return a string that resets the CGI and browser to a known state."""
diff --git a/Lib/code.py b/Lib/code.py
index 605aede..9020aab 100644
--- a/Lib/code.py
+++ b/Lib/code.py
@@ -105,9 +105,10 @@ class InteractiveInterpreter:
The output is written by self.write(), below.
"""
- type, value, sys.last_traceback = sys.exc_info()
+ type, value, tb = sys.exc_info()
sys.last_type = type
sys.last_value = value
+ sys.last_traceback = tb
if filename and type is SyntaxError:
# Work hard to stuff the correct filename in the exception
try:
@@ -119,8 +120,13 @@ class InteractiveInterpreter:
# Stuff in the right filename
value = SyntaxError(msg, (filename, lineno, offset, line))
sys.last_value = value
- lines = traceback.format_exception_only(type, value)
- self.write(''.join(lines))
+ if sys.excepthook is sys.__excepthook__:
+ lines = traceback.format_exception_only(type, value)
+ self.write(''.join(lines))
+ else:
+ # If someone has set sys.excepthook, we let that take precedence
+ # over self.write
+ sys.excepthook(type, value, tb)
def showtraceback(self):
"""Display the exception that just occurred.
@@ -143,7 +149,12 @@ class InteractiveInterpreter:
lines.extend(traceback.format_exception_only(type, value))
finally:
tblist = tb = None
- self.write(''.join(lines))
+ if sys.excepthook is sys.__excepthook__:
+ self.write(''.join(lines))
+ else:
+ # If someone has set sys.excepthook, we let that take precedence
+ # over self.write
+ sys.excepthook(type, value, tb)
def write(self, data):
"""Write a string.
diff --git a/Lib/codecs.py b/Lib/codecs.py
index b150d64..48d4c9c 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -461,7 +461,7 @@ class StreamReader(Codec):
# read until we get the required number of characters (if available)
while True:
- # can the request can be satisfied from the character buffer?
+ # can the request be satisfied from the character buffer?
if chars < 0:
if size < 0:
if self.charbuffer:
@@ -484,7 +484,7 @@ class StreamReader(Codec):
if firstline:
newchars, decodedbytes = \
self.decode(data[:exc.start], self.errors)
- lines = newchars.splitlines(True)
+ lines = newchars.splitlines(keepends=True)
if len(lines)<=1:
raise
else:
@@ -526,7 +526,7 @@ class StreamReader(Codec):
self.charbuffer = self.linebuffer[0]
self.linebuffer = None
if not keepends:
- line = line.splitlines(False)[0]
+ line = line.splitlines(keepends=False)[0]
return line
readsize = size or 72
@@ -543,7 +543,7 @@ class StreamReader(Codec):
data += self.read(size=1, chars=1)
line += data
- lines = line.splitlines(True)
+ lines = line.splitlines(keepends=True)
if lines:
if len(lines) > 1:
# More than one line result; the first line is a full line
@@ -559,10 +559,10 @@ class StreamReader(Codec):
# only one remaining line, put it back into charbuffer
self.charbuffer = lines[0] + self.charbuffer
if not keepends:
- line = line.splitlines(False)[0]
+ line = line.splitlines(keepends=False)[0]
break
line0withend = lines[0]
- line0withoutend = lines[0].splitlines(False)[0]
+ line0withoutend = lines[0].splitlines(keepends=False)[0]
if line0withend != line0withoutend: # We really have a line end
# Put the rest back together and keep it until the next call
self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
@@ -575,7 +575,7 @@ class StreamReader(Codec):
# we didn't get anything or this was our only try
if not data or size is not None:
if line and not keepends:
- line = line.splitlines(False)[0]
+ line = line.splitlines(keepends=False)[0]
break
if readsize < 8000:
readsize *= 2
@@ -803,7 +803,7 @@ class StreamRecoder:
data = self.reader.read()
data, bytesencoded = self.encode(data, self.errors)
- return data.splitlines(1)
+ return data.splitlines(keepends=True)
def __next__(self):
@@ -1042,10 +1042,7 @@ def make_identity_dict(rng):
mapped to themselves.
"""
- res = {}
- for i in rng:
- res[i]=i
- return res
+ return {i:i for i in rng}
def make_encoding_map(decoding_map):
diff --git a/Lib/collections.py b/Lib/collections/__init__.py
index eb20243..e5f9599 100644
--- a/Lib/collections.py
+++ b/Lib/collections/__init__.py
@@ -1,13 +1,14 @@
__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
- 'UserString', 'Counter', 'OrderedDict']
-# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
-# They should however be considered an integral part of collections.py.
-from _abcoll import *
-import _abcoll
-__all__ += _abcoll.__all__
+ 'UserString', 'Counter', 'OrderedDict', 'ChainMap']
+
+# For backwards compatibility, continue to make the collections ABCs
+# available through the collections module.
+from collections.abc import *
+import collections.abc
+__all__ += collections.abc.__all__
from _collections import deque, defaultdict
-from operator import itemgetter as _itemgetter
+from operator import itemgetter as _itemgetter, eq as _eq
from keyword import iskeyword as _iskeyword
import sys as _sys
import heapq as _heapq
@@ -228,7 +229,7 @@ class OrderedDict(dict):
'''
if isinstance(other, OrderedDict):
return len(self)==len(other) and \
- all(p==q for p, q in zip(self.items(), other.items()))
+ all(map(_eq, self.items(), other.items()))
return dict.__eq__(self, other)
@@ -314,10 +315,10 @@ def namedtuple(typename, field_names, verbose=False, rename=False):
"""
- # Parse and validate the field names. Validation serves two purposes,
- # generating informative error messages and preventing template injection attacks.
+ # Validate the field names. At the user's option, either generate an error
+ # message or automatically replace the field name with a valid name.
if isinstance(field_names, str):
- field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
+ field_names = field_names.replace(',', ' ').split()
field_names = list(map(str, field_names))
if rename:
seen = set()
@@ -332,15 +333,19 @@ def namedtuple(typename, field_names, verbose=False, rename=False):
seen.add(name)
for name in [typename] + field_names:
if not all(c.isalnum() or c=='_' for c in name):
- raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
+ raise ValueError('Type names and field names can only contain '
+ 'alphanumeric characters and underscores: %r' % name)
if _iskeyword(name):
- raise ValueError('Type names and field names cannot be a keyword: %r' % name)
+ raise ValueError('Type names and field names cannot be a '
+ 'keyword: %r' % name)
if name[0].isdigit():
- raise ValueError('Type names and field names cannot start with a number: %r' % name)
+ raise ValueError('Type names and field names cannot start with '
+ 'a number: %r' % name)
seen = set()
for name in field_names:
if name.startswith('_') and not rename:
- raise ValueError('Field names cannot start with an underscore: %r' % name)
+ raise ValueError('Field names cannot start with an underscore: '
+ '%r' % name)
if name in seen:
raise ValueError('Encountered duplicate field name: %r' % name)
seen.add(name)
@@ -351,21 +356,23 @@ def namedtuple(typename, field_names, verbose=False, rename=False):
field_names = tuple(field_names),
num_fields = len(field_names),
arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
- repr_fmt = ', '.join(_repr_template.format(name=name) for name in field_names),
+ repr_fmt = ', '.join(_repr_template.format(name=name)
+ for name in field_names),
field_defs = '\n'.join(_field_template.format(index=index, name=name)
for index, name in enumerate(field_names))
)
- # Execute the template string in a temporary namespace and
- # support tracing utilities by setting a value for frame.f_globals['__name__']
+ # Execute the template string in a temporary namespace and support
+ # tracing utilities by setting a value for frame.f_globals['__name__']
namespace = dict(__name__='namedtuple_%s' % typename)
try:
exec(class_definition, namespace)
except SyntaxError as e:
raise SyntaxError(e.msg + ':\n\n' + class_definition)
result = namespace[typename]
+ result._source = class_definition
if verbose:
- print(class_definition)
+ print(result._source)
# For pickling to work, the __module__ variable needs to be set to the frame
# where the named tuple is created. Bypass this step in enviroments where
@@ -674,12 +681,86 @@ class Counter(dict):
result[elem] = newcount
return result
+ def __pos__(self):
+ 'Adds an empty counter, effectively stripping negative and zero counts'
+ return self + Counter()
+
+ def __neg__(self):
+ '''Subtracts from an empty counter. Strips positive and zero counts,
+ and flips the sign on negative counts.
+
+ '''
+ return Counter() - self
+
+ def _keep_positive(self):
+ '''Internal method to strip elements with a negative or zero count'''
+ nonpositive = [elem for elem, count in self.items() if not count > 0]
+ for elem in nonpositive:
+ del self[elem]
+ return self
+
+ def __iadd__(self, other):
+ '''Inplace add from another counter, keeping only positive counts.
+
+ >>> c = Counter('abbb')
+ >>> c += Counter('bcc')
+ >>> c
+ Counter({'b': 4, 'c': 2, 'a': 1})
+
+ '''
+ for elem, count in other.items():
+ self[elem] += count
+ return self._keep_positive()
+
+ def __isub__(self, other):
+ '''Inplace subtract counter, but keep only results with positive counts.
+
+ >>> c = Counter('abbbc')
+ >>> c -= Counter('bccd')
+ >>> c
+ Counter({'b': 2, 'a': 1})
+
+ '''
+ for elem, count in other.items():
+ self[elem] -= count
+ return self._keep_positive()
+
+ def __ior__(self, other):
+ '''Inplace union is the maximum of value from either counter.
+
+ >>> c = Counter('abbb')
+ >>> c |= Counter('bcc')
+ >>> c
+ Counter({'b': 3, 'c': 2, 'a': 1})
+
+ '''
+ for elem, other_count in other.items():
+ count = self[elem]
+ if other_count > count:
+ self[elem] = other_count
+ return self._keep_positive()
+
+ def __iand__(self, other):
+ '''Inplace intersection is the minimum of corresponding counts.
+
+ >>> c = Counter('abbb')
+ >>> c &= Counter('bcc')
+ >>> c
+ Counter({'b': 1})
+
+ '''
+ for elem, count in self.items():
+ other_count = other[elem]
+ if other_count < count:
+ self[elem] = other_count
+ return self._keep_positive()
+
########################################################################
-### ChainMap (helper for configparser)
+### ChainMap (helper for configparser and string.Template)
########################################################################
-class _ChainMap(MutableMapping):
+class ChainMap(MutableMapping):
''' A ChainMap groups multiple dicts (or other mappings) together
to create a single, updateable view.
@@ -890,6 +971,8 @@ class UserList(MutableSequence):
def insert(self, i, item): self.data.insert(i, item)
def pop(self, i=-1): return self.data.pop(i)
def remove(self, item): self.data.remove(item)
+ def clear(self): self.data.clear()
+ def copy(self): return self.__class__(self)
def count(self, item): return self.data.count(item)
def index(self, item, *args): return self.data.index(item, *args)
def reverse(self): self.data.reverse()
@@ -1034,7 +1117,7 @@ class UserString(Sequence):
return self.data.split(sep, maxsplit)
def rsplit(self, sep=None, maxsplit=-1):
return self.data.rsplit(sep, maxsplit)
- def splitlines(self, keepends=0): return self.data.splitlines(keepends)
+ def splitlines(self, keepends=False): return self.data.splitlines(keepends)
def startswith(self, prefix, start=0, end=_sys.maxsize):
return self.data.startswith(prefix, start, end)
def strip(self, chars=None): return self.__class__(self.data.strip(chars))
@@ -1044,44 +1127,3 @@ class UserString(Sequence):
return self.__class__(self.data.translate(*args))
def upper(self): return self.__class__(self.data.upper())
def zfill(self, width): return self.__class__(self.data.zfill(width))
-
-
-
-################################################################################
-### Simple tests
-################################################################################
-
-if __name__ == '__main__':
- # verify that instances can be pickled
- from pickle import loads, dumps
- Point = namedtuple('Point', 'x, y', True)
- p = Point(x=10, y=20)
- assert p == loads(dumps(p))
-
- # test and demonstrate ability to override methods
- class Point(namedtuple('Point', 'x y')):
- __slots__ = ()
- @property
- def hypot(self):
- return (self.x ** 2 + self.y ** 2) ** 0.5
- def __str__(self):
- return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
-
- for p in Point(3, 4), Point(14, 5/7.):
- print (p)
-
- class Point(namedtuple('Point', 'x y')):
- 'Point class with optimized _make() and _replace() without error-checking'
- __slots__ = ()
- _make = classmethod(tuple.__new__)
- def _replace(self, _map=map, **kwds):
- return self._make(_map(kwds.get, ('x', 'y'), self))
-
- print(Point(11, 22)._replace(x=100))
-
- Point3D = namedtuple('Point3D', Point._fields + ('z',))
- print(Point3D.__doc__)
-
- import doctest
- TestResults = namedtuple('TestResults', 'failed attempted')
- print(TestResults(*doctest.testmod()))
diff --git a/Lib/collections/__main__.py b/Lib/collections/__main__.py
new file mode 100644
index 0000000..763e38e
--- /dev/null
+++ b/Lib/collections/__main__.py
@@ -0,0 +1,38 @@
+################################################################################
+### Simple tests
+################################################################################
+
+# verify that instances can be pickled
+from collections import namedtuple
+from pickle import loads, dumps
+Point = namedtuple('Point', 'x, y', True)
+p = Point(x=10, y=20)
+assert p == loads(dumps(p))
+
+# test and demonstrate ability to override methods
+class Point(namedtuple('Point', 'x y')):
+ __slots__ = ()
+ @property
+ def hypot(self):
+ return (self.x ** 2 + self.y ** 2) ** 0.5
+ def __str__(self):
+ return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
+
+for p in Point(3, 4), Point(14, 5/7.):
+ print (p)
+
+class Point(namedtuple('Point', 'x y')):
+ 'Point class with optimized _make() and _replace() without error-checking'
+ __slots__ = ()
+ _make = classmethod(tuple.__new__)
+ def _replace(self, _map=map, **kwds):
+ return self._make(_map(kwds.get, ('x', 'y'), self))
+
+print(Point(11, 22)._replace(x=100))
+
+Point3D = namedtuple('Point3D', Point._fields + ('z',))
+print(Point3D.__doc__)
+
+import doctest, collections
+TestResults = namedtuple('TestResults', 'failed attempted')
+print(TestResults(*doctest.testmod(collections)))
diff --git a/Lib/_abcoll.py b/Lib/collections/abc.py
index 2417d18..d17cfdc 100644
--- a/Lib/_abcoll.py
+++ b/Lib/collections/abc.py
@@ -3,9 +3,7 @@
"""Abstract Base Classes (ABCs) for collections, according to PEP 3119.
-DON'T USE THIS MODULE DIRECTLY! The classes here should be imported
-via collections; they are defined here only to alleviate certain
-bootstrapping issues. Unit tests are in test_collections.
+Unit tests are in test_collections.
"""
from abc import ABCMeta, abstractmethod
@@ -20,9 +18,13 @@ __all__ = ["Hashable", "Iterable", "Iterator",
"ByteString",
]
-
-### collection related types which are not exposed through builtin ###
-## iterators ##
+# Private list of types that we want to register with the various ABCs
+# so that they will pass tests like:
+# it = iter(somebytearray)
+# assert isinstance(it, Iterable)
+# Note: in other implementations, these types many not be distinct
+# and they make have their own implementation specific types that
+# are not included on this list.
bytes_iterator = type(iter(b''))
bytearray_iterator = type(iter(bytearray()))
#callable_iterator = ???
@@ -41,13 +43,15 @@ dict_keys = type({}.keys())
dict_values = type({}.values())
dict_items = type({}.items())
## misc ##
-dict_proxy = type(type.__dict__)
+mappingproxy = type(type.__dict__)
### ONE-TRICK PONIES ###
class Hashable(metaclass=ABCMeta):
+ __slots__ = ()
+
@abstractmethod
def __hash__(self):
return 0
@@ -65,6 +69,8 @@ class Hashable(metaclass=ABCMeta):
class Iterable(metaclass=ABCMeta):
+ __slots__ = ()
+
@abstractmethod
def __iter__(self):
while False:
@@ -80,6 +86,8 @@ class Iterable(metaclass=ABCMeta):
class Iterator(Iterable):
+ __slots__ = ()
+
@abstractmethod
def __next__(self):
raise StopIteration
@@ -111,6 +119,8 @@ Iterator.register(zip_iterator)
class Sized(metaclass=ABCMeta):
+ __slots__ = ()
+
@abstractmethod
def __len__(self):
return 0
@@ -125,6 +135,8 @@ class Sized(metaclass=ABCMeta):
class Container(metaclass=ABCMeta):
+ __slots__ = ()
+
@abstractmethod
def __contains__(self, x):
return False
@@ -139,6 +151,8 @@ class Container(metaclass=ABCMeta):
class Callable(metaclass=ABCMeta):
+ __slots__ = ()
+
@abstractmethod
def __call__(self, *args, **kwds):
return False
@@ -166,6 +180,8 @@ class Set(Sized, Iterable, Container):
then the other operations will automatically follow suit.
"""
+ __slots__ = ()
+
def __le__(self, other):
if not isinstance(other, Set):
return NotImplemented
@@ -277,6 +293,8 @@ Set.register(frozenset)
class MutableSet(Set):
+ __slots__ = ()
+
@abstractmethod
def add(self, value):
"""Add an element."""
@@ -350,6 +368,8 @@ MutableSet.register(set)
class Mapping(Sized, Iterable, Container):
+ __slots__ = ()
+
@abstractmethod
def __getitem__(self, key):
raise KeyError
@@ -385,6 +405,8 @@ class Mapping(Sized, Iterable, Container):
def __ne__(self, other):
return not (self == other)
+Mapping.register(mappingproxy)
+
class MappingView(Sized):
@@ -453,6 +475,8 @@ ValuesView.register(dict_values)
class MutableMapping(Mapping):
+ __slots__ = ()
+
@abstractmethod
def __setitem__(self, key, value):
raise KeyError
@@ -532,6 +556,8 @@ class Sequence(Sized, Iterable, Container):
__getitem__, and __len__.
"""
+ __slots__ = ()
+
@abstractmethod
def __getitem__(self, index):
raise IndexError
@@ -577,12 +603,16 @@ class ByteString(Sequence):
XXX Should add all their methods.
"""
+ __slots__ = ()
+
ByteString.register(bytes)
ByteString.register(bytearray)
class MutableSequence(Sequence):
+ __slots__ = ()
+
@abstractmethod
def __setitem__(self, index, value):
raise IndexError
@@ -598,6 +628,13 @@ class MutableSequence(Sequence):
def append(self, value):
self.insert(len(self), value)
+ def clear(self):
+ try:
+ while True:
+ self.pop()
+ except IndexError:
+ pass
+
def reverse(self):
n = len(self)
for i in range(n//2):
diff --git a/Lib/concurrent/futures/_base.py b/Lib/concurrent/futures/_base.py
index 9f11f69..1e098be 100644
--- a/Lib/concurrent/futures/_base.py
+++ b/Lib/concurrent/futures/_base.py
@@ -4,7 +4,6 @@
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
import collections
-import functools
import logging
import threading
import time
@@ -471,8 +470,8 @@ class Future(object):
return True
else:
LOGGER.critical('Future %s in unexpected state: %s',
- id(self.future),
- self.future._state)
+ id(self),
+ self._state)
raise RuntimeError('Future in unexpected state')
def set_result(self, result):
@@ -538,15 +537,19 @@ class Executor(object):
fs = [self.submit(fn, *args) for args in zip(*iterables)]
- try:
- for future in fs:
- if timeout is None:
- yield future.result()
- else:
- yield future.result(end_time - time.time())
- finally:
- for future in fs:
- future.cancel()
+ # Yield must be hidden in closure so that the futures are submitted
+ # before the first iterator value is required.
+ def result_iterator():
+ try:
+ for future in fs:
+ if timeout is None:
+ yield future.result()
+ else:
+ yield future.result(end_time - time.time())
+ finally:
+ for future in fs:
+ future.cancel()
+ return result_iterator()
def shutdown(self, wait=True):
"""Clean-up the resources associated with the Executor.
diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py
index d3bbe2c..04238a7 100644
--- a/Lib/concurrent/futures/process.py
+++ b/Lib/concurrent/futures/process.py
@@ -46,9 +46,12 @@ Process #1..n:
__author__ = 'Brian Quinlan (brian@sweetapp.com)'
import atexit
+import os
from concurrent.futures import _base
import queue
import multiprocessing
+from multiprocessing.queues import SimpleQueue, Full
+from multiprocessing.connection import wait
import threading
import weakref
@@ -121,7 +124,7 @@ def _process_worker(call_queue, result_queue):
call_item = call_queue.get(block=True)
if call_item is None:
# Wake up queue management thread
- result_queue.put(None)
+ result_queue.put(os.getpid())
return
try:
r = call_item.fn(*call_item.args, **call_item.kwargs)
@@ -193,46 +196,92 @@ def _queue_management_worker(executor_reference,
result_queue: A multiprocessing.Queue of _ResultItems generated by the
process workers.
"""
- nb_shutdown_processes = 0
- def shutdown_one_process():
- """Tell a worker to terminate, which will in turn wake us again"""
- nonlocal nb_shutdown_processes
- call_queue.put(None)
- nb_shutdown_processes += 1
+ executor = None
+
+ def shutting_down():
+ return _shutdown or executor is None or executor._shutdown_thread
+
+ def shutdown_worker():
+ # This is an upper bound
+ nb_children_alive = sum(p.is_alive() for p in processes.values())
+ for i in range(0, nb_children_alive):
+ call_queue.put_nowait(None)
+ # Release the queue's resources as soon as possible.
+ call_queue.close()
+ # If .join() is not called on the created processes then
+ # some multiprocessing.Queue methods may deadlock on Mac OS X.
+ for p in processes.values():
+ p.join()
+
+ reader = result_queue._reader
+
while True:
_add_call_item_to_queue(pending_work_items,
work_ids_queue,
call_queue)
- result_item = result_queue.get(block=True)
- if result_item is not None:
- work_item = pending_work_items[result_item.work_id]
- del pending_work_items[result_item.work_id]
-
- if result_item.exception:
- work_item.future.set_exception(result_item.exception)
- else:
- work_item.future.set_result(result_item.result)
+ sentinels = [p.sentinel for p in processes.values()]
+ assert sentinels
+ ready = wait([reader] + sentinels)
+ if reader in ready:
+ result_item = reader.recv()
+ else:
+ # Mark the process pool broken so that submits fail right now.
+ executor = executor_reference()
+ if executor is not None:
+ executor._broken = True
+ executor._shutdown_thread = True
+ executor = None
+ # All futures in flight must be marked failed
+ for work_id, work_item in pending_work_items.items():
+ work_item.future.set_exception(
+ BrokenProcessPool(
+ "A process in the process pool was "
+ "terminated abruptly while the future was "
+ "running or pending."
+ ))
+ pending_work_items.clear()
+ # Terminate remaining workers forcibly: the queues or their
+ # locks may be in a dirty state and block forever.
+ for p in processes.values():
+ p.terminate()
+ shutdown_worker()
+ return
+ if isinstance(result_item, int):
+ # Clean shutdown of a worker using its PID
+ # (avoids marking the executor broken)
+ assert shutting_down()
+ p = processes.pop(result_item)
+ p.join()
+ if not processes:
+ shutdown_worker()
+ return
+ elif result_item is not None:
+ work_item = pending_work_items.pop(result_item.work_id, None)
+ # work_item can be None if another process terminated (see above)
+ if work_item is not None:
+ if result_item.exception:
+ work_item.future.set_exception(result_item.exception)
+ else:
+ work_item.future.set_result(result_item.result)
# Check whether we should start shutting down.
executor = executor_reference()
# No more work items can be added if:
# - The interpreter is shutting down OR
# - The executor that owns this worker has been collected OR
# - The executor that owns this worker has been shutdown.
- if _shutdown or executor is None or executor._shutdown_thread:
- # Since no new work items can be added, it is safe to shutdown
- # this thread if there are no pending work items.
- if not pending_work_items:
- while nb_shutdown_processes < len(processes):
- shutdown_one_process()
- # If .join() is not called on the created processes then
- # some multiprocessing.Queue methods may deadlock on Mac OS
- # X.
- for p in processes:
- p.join()
- call_queue.close()
- return
- del executor
+ if shutting_down():
+ try:
+ # Since no new work items can be added, it is safe to shutdown
+ # this thread if there are no pending work items.
+ if not pending_work_items:
+ shutdown_worker()
+ return
+ except Full:
+ # This is not a problem: we will eventually be woken up (in
+ # result_queue.get()) and be able to send a sentinel again.
+ pass
+ executor = None
_system_limits_checked = False
_system_limited = None
@@ -243,7 +292,6 @@ def _check_system_limits():
raise NotImplementedError(_system_limited)
_system_limits_checked = True
try:
- import os
nsems_max = os.sysconf("SC_SEM_NSEMS_MAX")
except (AttributeError, ValueError):
# sysconf not available or setting not available
@@ -259,6 +307,14 @@ def _check_system_limits():
_system_limited = "system provides too few semaphores (%d available, 256 necessary)" % nsems_max
raise NotImplementedError(_system_limited)
+
+class BrokenProcessPool(RuntimeError):
+ """
+ Raised when a process in a ProcessPoolExecutor terminated abruptly
+ while a future was in the running state.
+ """
+
+
class ProcessPoolExecutor(_base.Executor):
def __init__(self, max_workers=None):
"""Initializes a new ProcessPoolExecutor instance.
@@ -280,14 +336,20 @@ class ProcessPoolExecutor(_base.Executor):
# because futures in the call queue cannot be cancelled.
self._call_queue = multiprocessing.Queue(self._max_workers +
EXTRA_QUEUED_CALLS)
- self._result_queue = multiprocessing.Queue()
+ # Killed worker processes can produce spurious "broken pipe"
+ # tracebacks in the queue's own worker thread. But we detect killed
+ # processes anyway, so silence the tracebacks.
+ self._call_queue._ignore_epipe = True
+ self._result_queue = SimpleQueue()
self._work_ids = queue.Queue()
self._queue_management_thread = None
- self._processes = set()
+ # Map of pids to processes
+ self._processes = {}
# Shutdown is a two-step process.
self._shutdown_thread = False
self._shutdown_lock = threading.Lock()
+ self._broken = False
self._queue_count = 0
self._pending_work_items = {}
@@ -297,6 +359,8 @@ class ProcessPoolExecutor(_base.Executor):
def weakref_cb(_, q=self._result_queue):
q.put(None)
if self._queue_management_thread is None:
+ # Start the processes so that their sentinels are known.
+ self._adjust_process_count()
self._queue_management_thread = threading.Thread(
target=_queue_management_worker,
args=(weakref.ref(self, weakref_cb),
@@ -316,10 +380,13 @@ class ProcessPoolExecutor(_base.Executor):
args=(self._call_queue,
self._result_queue))
p.start()
- self._processes.add(p)
+ self._processes[p.pid] = p
def submit(self, fn, *args, **kwargs):
with self._shutdown_lock:
+ if self._broken:
+ raise BrokenProcessPool('A child process terminated '
+ 'abruptly, the process pool is not usable anymore')
if self._shutdown_thread:
raise RuntimeError('cannot schedule new futures after shutdown')
@@ -333,7 +400,6 @@ class ProcessPoolExecutor(_base.Executor):
self._result_queue.put(None)
self._start_queue_management_thread()
- self._adjust_process_count()
return f
submit.__doc__ = _base.Executor.submit.__doc__
diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py
index fbac088..95bb682 100644
--- a/Lib/concurrent/futures/thread.py
+++ b/Lib/concurrent/futures/thread.py
@@ -74,7 +74,7 @@ def _worker(executor_reference, work_queue):
work_queue.put(None)
return
del executor
- except BaseException as e:
+ except BaseException:
_base.LOGGER.critical('Exception in worker', exc_info=True)
class ThreadPoolExecutor(_base.Executor):
diff --git a/Lib/configparser.py b/Lib/configparser.py
index d148b88..9d5f779 100644
--- a/Lib/configparser.py
+++ b/Lib/configparser.py
@@ -119,7 +119,8 @@ ConfigParser -- responsible for parsing a list of
between keys and values are surrounded by spaces.
"""
-from collections import MutableMapping, OrderedDict as _default_dict, _ChainMap
+from collections.abc import MutableMapping
+from collections import OrderedDict as _default_dict, ChainMap as _ChainMap
import functools
import io
import itertools
@@ -992,18 +993,26 @@ class RawConfigParser(MutableMapping):
indent_level = 0
e = None # None, or an exception
for lineno, line in enumerate(fp, start=1):
- comment_start = None
+ comment_start = sys.maxsize
# strip inline comments
- for prefix in self._inline_comment_prefixes:
- index = line.find(prefix)
- if index == 0 or (index > 0 and line[index-1].isspace()):
- comment_start = index
- break
+ inline_prefixes = {p: -1 for p in self._inline_comment_prefixes}
+ while comment_start == sys.maxsize and inline_prefixes:
+ next_prefixes = {}
+ for prefix, index in inline_prefixes.items():
+ index = line.find(prefix, index+1)
+ if index == -1:
+ continue
+ next_prefixes[prefix] = index
+ if index == 0 or (index > 0 and line[index-1].isspace()):
+ comment_start = min(comment_start, index)
+ inline_prefixes = next_prefixes
# strip full line comments
for prefix in self._comment_prefixes:
if line.strip().startswith(prefix):
comment_start = 0
break
+ if comment_start == sys.maxsize:
+ comment_start = None
value = line[:comment_start].strip()
if not value:
if self._empty_lines_in_values:
diff --git a/Lib/contextlib.py b/Lib/contextlib.py
index 5ebbbc6..bde2feb 100644
--- a/Lib/contextlib.py
+++ b/Lib/contextlib.py
@@ -1,10 +1,10 @@
"""Utilities for with-statement contexts. See PEP 343."""
import sys
+from collections import deque
from functools import wraps
-from warnings import warn
-__all__ = ["contextmanager", "closing", "ContextDecorator"]
+__all__ = ["contextmanager", "closing", "ContextDecorator", "ExitStack"]
class ContextDecorator(object):
@@ -13,12 +13,12 @@ class ContextDecorator(object):
def _recreate_cm(self):
"""Return a recreated instance of self.
- Allows otherwise one-shot context managers like
+ Allows an otherwise one-shot context manager like
_GeneratorContextManager to support use as
- decorators via implicit recreation.
+ a decorator via implicit recreation.
- Note: this is a private interface just for _GCM in 3.2 but will be
- renamed and documented for third party use in 3.3
+ This is a private interface just for _GeneratorContextManager.
+ See issue #11647 for details.
"""
return self
@@ -139,3 +139,117 @@ class closing(object):
return self.thing
def __exit__(self, *exc_info):
self.thing.close()
+
+
+# Inspired by discussions on http://bugs.python.org/issue13585
+class ExitStack(object):
+ """Context manager for dynamic management of a stack of exit callbacks
+
+ For example:
+
+ with ExitStack() as stack:
+ files = [stack.enter_context(open(fname)) for fname in filenames]
+ # All opened files will automatically be closed at the end of
+ # the with statement, even if attempts to open files later
+ # in the list throw an exception
+
+ """
+ def __init__(self):
+ self._exit_callbacks = deque()
+
+ def pop_all(self):
+ """Preserve the context stack by transferring it to a new instance"""
+ new_stack = type(self)()
+ new_stack._exit_callbacks = self._exit_callbacks
+ self._exit_callbacks = deque()
+ return new_stack
+
+ def _push_cm_exit(self, cm, cm_exit):
+ """Helper to correctly register callbacks to __exit__ methods"""
+ def _exit_wrapper(*exc_details):
+ return cm_exit(cm, *exc_details)
+ _exit_wrapper.__self__ = cm
+ self.push(_exit_wrapper)
+
+ def push(self, exit):
+ """Registers a callback with the standard __exit__ method signature
+
+ Can suppress exceptions the same way __exit__ methods can.
+
+ Also accepts any object with an __exit__ method (registering a call
+ to the method instead of the object itself)
+ """
+ # We use an unbound method rather than a bound method to follow
+ # the standard lookup behaviour for special methods
+ _cb_type = type(exit)
+ try:
+ exit_method = _cb_type.__exit__
+ except AttributeError:
+ # Not a context manager, so assume its a callable
+ self._exit_callbacks.append(exit)
+ else:
+ self._push_cm_exit(exit, exit_method)
+ return exit # Allow use as a decorator
+
+ def callback(self, callback, *args, **kwds):
+ """Registers an arbitrary callback and arguments.
+
+ Cannot suppress exceptions.
+ """
+ def _exit_wrapper(exc_type, exc, tb):
+ callback(*args, **kwds)
+ # We changed the signature, so using @wraps is not appropriate, but
+ # setting __wrapped__ may still help with introspection
+ _exit_wrapper.__wrapped__ = callback
+ self.push(_exit_wrapper)
+ return callback # Allow use as a decorator
+
+ def enter_context(self, cm):
+ """Enters the supplied context manager
+
+ If successful, also pushes its __exit__ method as a callback and
+ returns the result of the __enter__ method.
+ """
+ # We look up the special methods on the type to match the with statement
+ _cm_type = type(cm)
+ _exit = _cm_type.__exit__
+ result = _cm_type.__enter__(cm)
+ self._push_cm_exit(cm, _exit)
+ return result
+
+ def close(self):
+ """Immediately unwind the context stack"""
+ self.__exit__(None, None, None)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *exc_details):
+ # We manipulate the exception state so it behaves as though
+ # we were actually nesting multiple with statements
+ frame_exc = sys.exc_info()[1]
+ def _fix_exception_context(new_exc, old_exc):
+ while 1:
+ exc_context = new_exc.__context__
+ if exc_context in (None, frame_exc):
+ break
+ new_exc = exc_context
+ new_exc.__context__ = old_exc
+
+ # Callbacks are invoked in LIFO order to match the behaviour of
+ # nested context managers
+ suppressed_exc = False
+ while self._exit_callbacks:
+ cb = self._exit_callbacks.pop()
+ try:
+ if cb(*exc_details):
+ suppressed_exc = True
+ exc_details = (None, None, None)
+ except:
+ new_exc_details = sys.exc_info()
+ # simulate the stack of exceptions by setting the context
+ _fix_exception_context(new_exc_details[1], exc_details[1])
+ if not self._exit_callbacks:
+ raise
+ exc_details = new_exc_details
+ return suppressed_exc
diff --git a/Lib/copy.py b/Lib/copy.py
index 089d101..d96201e 100644
--- a/Lib/copy.py
+++ b/Lib/copy.py
@@ -173,8 +173,10 @@ def deepcopy(x, memo=None, _nil=[]):
"un(deep)copyable object of type %s" % cls)
y = _reconstruct(x, rv, 1, memo)
- memo[d] = y
- _keep_alive(x, memo) # Make sure x lives at least as long as d
+ # If is its own copy, don't memoize.
+ if y is not x:
+ memo[d] = y
+ _keep_alive(x, memo) # Make sure x lives at least as long as d
return y
_deepcopy_dispatch = d = {}
@@ -214,9 +216,10 @@ def _deepcopy_tuple(x, memo):
y = []
for a in x:
y.append(deepcopy(a, memo))
- d = id(x)
+ # We're not going to put the tuple in the memo, but it's still important we
+ # check for it, in case the tuple contains recursive mutable structures.
try:
- return memo[d]
+ return memo[id(x)]
except KeyError:
pass
for i in range(len(x)):
@@ -225,7 +228,6 @@ def _deepcopy_tuple(x, memo):
break
else:
y = x
- memo[d] = y
return y
d[tuple] = _deepcopy_tuple
@@ -321,68 +323,3 @@ del types
# Helper for instance creation without calling __init__
class _EmptyClass:
pass
-
-def _test():
- l = [None, 1, 2, 3.14, 'xyzzy', (1, 2), [3.14, 'abc'],
- {'abc': 'ABC'}, (), [], {}]
- l1 = copy(l)
- print(l1==l)
- l1 = map(copy, l)
- print(l1==l)
- l1 = deepcopy(l)
- print(l1==l)
- class C:
- def __init__(self, arg=None):
- self.a = 1
- self.arg = arg
- if __name__ == '__main__':
- import sys
- file = sys.argv[0]
- else:
- file = __file__
- self.fp = open(file)
- self.fp.close()
- def __getstate__(self):
- return {'a': self.a, 'arg': self.arg}
- def __setstate__(self, state):
- for key, value in state.items():
- setattr(self, key, value)
- def __deepcopy__(self, memo=None):
- new = self.__class__(deepcopy(self.arg, memo))
- new.a = self.a
- return new
- c = C('argument sketch')
- l.append(c)
- l2 = copy(l)
- print(l == l2)
- print(l)
- print(l2)
- l2 = deepcopy(l)
- print(l == l2)
- print(l)
- print(l2)
- l.append({l[1]: l, 'xyz': l[2]})
- l3 = copy(l)
- import reprlib
- print(map(reprlib.repr, l))
- print(map(reprlib.repr, l1))
- print(map(reprlib.repr, l2))
- print(map(reprlib.repr, l3))
- l3 = deepcopy(l)
- print(map(reprlib.repr, l))
- print(map(reprlib.repr, l1))
- print(map(reprlib.repr, l2))
- print(map(reprlib.repr, l3))
- class odict(dict):
- def __init__(self, d = {}):
- self.a = 99
- dict.__init__(self, d)
- def __setitem__(self, k, i):
- dict.__setitem__(self, k, i)
- self.a
- o = odict({"A" : "B"})
- x = deepcopy(o)
- print(o, x)
-
-if __name__ == '__main__':
- _test()
diff --git a/Lib/crypt.py b/Lib/crypt.py
new file mode 100644
index 0000000..b90c81c
--- /dev/null
+++ b/Lib/crypt.py
@@ -0,0 +1,62 @@
+"""Wrapper to the POSIX crypt library call and associated functionality."""
+
+import _crypt
+import string as _string
+from random import SystemRandom as _SystemRandom
+from collections import namedtuple as _namedtuple
+
+
+_saltchars = _string.ascii_letters + _string.digits + './'
+_sr = _SystemRandom()
+
+
+class _Method(_namedtuple('_Method', 'name ident salt_chars total_size')):
+
+ """Class representing a salt method per the Modular Crypt Format or the
+ legacy 2-character crypt method."""
+
+ def __repr__(self):
+ return '<crypt.METHOD_{}>'.format(self.name)
+
+
+def mksalt(method=None):
+ """Generate a salt for the specified method.
+
+ If not specified, the strongest available method will be used.
+
+ """
+ if method is None:
+ method = methods[0]
+ s = '${}$'.format(method.ident) if method.ident else ''
+ s += ''.join(_sr.sample(_saltchars, method.salt_chars))
+ return s
+
+
+def crypt(word, salt=None):
+ """Return a string representing the one-way hash of a password, with a salt
+ prepended.
+
+ If ``salt`` is not specified or is ``None``, the strongest
+ available method will be selected and a salt generated. Otherwise,
+ ``salt`` may be one of the ``crypt.METHOD_*`` values, or a string as
+ returned by ``crypt.mksalt()``.
+
+ """
+ if salt is None or isinstance(salt, _Method):
+ salt = mksalt(salt)
+ return _crypt.crypt(word, salt)
+
+
+# available salting/crypto methods
+METHOD_CRYPT = _Method('CRYPT', None, 2, 13)
+METHOD_MD5 = _Method('MD5', '1', 8, 34)
+METHOD_SHA256 = _Method('SHA256', '5', 16, 63)
+METHOD_SHA512 = _Method('SHA512', '6', 16, 106)
+
+methods = []
+for _method in (METHOD_SHA512, METHOD_SHA256, METHOD_MD5):
+ _result = crypt('', _method)
+ if _result and len(_result) == _method.total_size:
+ methods.append(_method)
+methods.append(METHOD_CRYPT)
+del _result, _method
diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py
index 111209a..c92e130 100644
--- a/Lib/ctypes/__init__.py
+++ b/Lib/ctypes/__init__.py
@@ -26,7 +26,7 @@ if _os.name == "posix" and _sys.platform == "darwin":
# libraries. OS X 10.3 is Darwin 7, so we check for
# that.
- if int(_os.uname()[2].split('.')[0]) < 8:
+ if int(_os.uname().release.split('.')[0]) < 8:
DEFAULT_MODE = RTLD_GLOBAL
from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \
@@ -456,7 +456,7 @@ if _os.name in ("nt", "ce"):
code = GetLastError()
if descr is None:
descr = FormatError(code).strip()
- return WindowsError(code, descr)
+ return WindowsError(None, descr, None, code)
if sizeof(c_uint) == sizeof(c_void_p):
c_size_t = c_uint
diff --git a/Lib/ctypes/test/test_callbacks.py b/Lib/ctypes/test/test_callbacks.py
index c7207ea..5600b43 100644
--- a/Lib/ctypes/test/test_callbacks.py
+++ b/Lib/ctypes/test/test_callbacks.py
@@ -140,7 +140,7 @@ class Callbacks(unittest.TestCase):
def __del__(self):
gc.collect()
CFUNCTYPE(None)(lambda x=Nasty(): None)
-
+
try:
WINFUNCTYPE
diff --git a/Lib/ctypes/test/test_memfunctions.py b/Lib/ctypes/test/test_memfunctions.py
index aa2113b..aec4aaa 100644
--- a/Lib/ctypes/test/test_memfunctions.py
+++ b/Lib/ctypes/test/test_memfunctions.py
@@ -1,4 +1,5 @@
import sys
+from test import support
import unittest
from ctypes import *
@@ -49,6 +50,7 @@ class MemFunctionsTest(unittest.TestCase):
self.assertEqual(cast(a, POINTER(c_byte))[:7:7],
[97])
+ @support.refcount_test
def test_string_at(self):
s = string_at(b"foo bar")
# XXX The following may be wrong, depending on how Python
diff --git a/Lib/ctypes/test/test_parameters.py b/Lib/ctypes/test/test_parameters.py
index e83fd9a..9762fb9 100644
--- a/Lib/ctypes/test/test_parameters.py
+++ b/Lib/ctypes/test/test_parameters.py
@@ -73,13 +73,10 @@ class SimpleTypesTestCase(unittest.TestCase):
except ImportError:
## print "(No c_wchar_p)"
return
- s = "123"
- if sys.platform == "win32":
- self.assertTrue(c_wchar_p.from_param(s)._obj is s)
- self.assertRaises(TypeError, c_wchar_p.from_param, 42)
- # new in 0.9.1: convert (decode) ascii to unicode
- self.assertEqual(c_wchar_p.from_param("123")._obj, "123")
+ c_wchar_p.from_param("123")
+
+ self.assertRaises(TypeError, c_wchar_p.from_param, 42)
self.assertRaises(TypeError, c_wchar_p.from_param, b"123\377")
pa = c_wchar_p.from_param(c_wchar_p("123"))
diff --git a/Lib/ctypes/test/test_pep3118.py b/Lib/ctypes/test/test_pep3118.py
index fa6461f..ad13b01 100644
--- a/Lib/ctypes/test/test_pep3118.py
+++ b/Lib/ctypes/test/test_pep3118.py
@@ -25,14 +25,17 @@ class Test(unittest.TestCase):
v = memoryview(ob)
try:
self.assertEqual(normalize(v.format), normalize(fmt))
- if shape is not None:
+ if shape:
self.assertEqual(len(v), shape[0])
else:
self.assertEqual(len(v) * sizeof(itemtp), sizeof(ob))
self.assertEqual(v.itemsize, sizeof(itemtp))
self.assertEqual(v.shape, shape)
- # ctypes object always have a non-strided memory block
- self.assertEqual(v.strides, None)
+ # XXX Issue #12851: PyCData_NewGetBuffer() must provide strides
+ # if requested. memoryview currently reconstructs missing
+ # stride information, so this assert will fail.
+ # self.assertEqual(v.strides, ())
+
# they are always read/write
self.assertFalse(v.readonly)
@@ -52,14 +55,15 @@ class Test(unittest.TestCase):
v = memoryview(ob)
try:
self.assertEqual(v.format, fmt)
- if shape is not None:
+ if shape:
self.assertEqual(len(v), shape[0])
else:
self.assertEqual(len(v) * sizeof(itemtp), sizeof(ob))
self.assertEqual(v.itemsize, sizeof(itemtp))
self.assertEqual(v.shape, shape)
- # ctypes object always have a non-strided memory block
- self.assertEqual(v.strides, None)
+ # XXX Issue #12851
+ # self.assertEqual(v.strides, ())
+
# they are always read/write
self.assertFalse(v.readonly)
@@ -110,34 +114,34 @@ native_types = [
## simple types
- (c_char, "<c", None, c_char),
- (c_byte, "<b", None, c_byte),
- (c_ubyte, "<B", None, c_ubyte),
- (c_short, "<h", None, c_short),
- (c_ushort, "<H", None, c_ushort),
+ (c_char, "<c", (), c_char),
+ (c_byte, "<b", (), c_byte),
+ (c_ubyte, "<B", (), c_ubyte),
+ (c_short, "<h", (), c_short),
+ (c_ushort, "<H", (), c_ushort),
# c_int and c_uint may be aliases to c_long
- #(c_int, "<i", None, c_int),
- #(c_uint, "<I", None, c_uint),
+ #(c_int, "<i", (), c_int),
+ #(c_uint, "<I", (), c_uint),
- (c_long, "<l", None, c_long),
- (c_ulong, "<L", None, c_ulong),
+ (c_long, "<l", (), c_long),
+ (c_ulong, "<L", (), c_ulong),
# c_longlong and c_ulonglong are aliases on 64-bit platforms
#(c_longlong, "<q", None, c_longlong),
#(c_ulonglong, "<Q", None, c_ulonglong),
- (c_float, "<f", None, c_float),
- (c_double, "<d", None, c_double),
+ (c_float, "<f", (), c_float),
+ (c_double, "<d", (), c_double),
# c_longdouble may be an alias to c_double
- (c_bool, "<?", None, c_bool),
- (py_object, "<O", None, py_object),
+ (c_bool, "<?", (), c_bool),
+ (py_object, "<O", (), py_object),
## pointers
- (POINTER(c_byte), "&<b", None, POINTER(c_byte)),
- (POINTER(POINTER(c_long)), "&&<l", None, POINTER(POINTER(c_long))),
+ (POINTER(c_byte), "&<b", (), POINTER(c_byte)),
+ (POINTER(POINTER(c_long)), "&&<l", (), POINTER(POINTER(c_long))),
## arrays and pointers
@@ -145,32 +149,32 @@ native_types = [
(c_float * 4 * 3 * 2, "(2,3,4)<f", (2,3,4), c_float),
(POINTER(c_short) * 2, "(2)&<h", (2,), POINTER(c_short)),
(POINTER(c_short) * 2 * 3, "(3,2)&<h", (3,2,), POINTER(c_short)),
- (POINTER(c_short * 2), "&(2)<h", None, POINTER(c_short)),
+ (POINTER(c_short * 2), "&(2)<h", (), POINTER(c_short)),
## structures and unions
- (Point, "T{<l:x:<l:y:}", None, Point),
+ (Point, "T{<l:x:<l:y:}", (), Point),
# packed structures do not implement the pep
- (PackedPoint, "B", None, PackedPoint),
- (Point2, "T{<l:x:<l:y:}", None, Point2),
- (EmptyStruct, "T{}", None, EmptyStruct),
+ (PackedPoint, "B", (), PackedPoint),
+ (Point2, "T{<l:x:<l:y:}", (), Point2),
+ (EmptyStruct, "T{}", (), EmptyStruct),
# the pep does't support unions
- (aUnion, "B", None, aUnion),
+ (aUnion, "B", (), aUnion),
## pointer to incomplete structure
- (Incomplete, "B", None, Incomplete),
- (POINTER(Incomplete), "&B", None, POINTER(Incomplete)),
+ (Incomplete, "B", (), Incomplete),
+ (POINTER(Incomplete), "&B", (), POINTER(Incomplete)),
# 'Complete' is a structure that starts incomplete, but is completed after the
# pointer type to it has been created.
- (Complete, "T{<l:a:}", None, Complete),
+ (Complete, "T{<l:a:}", (), Complete),
# Unfortunately the pointer format string is not fixed...
- (POINTER(Complete), "&B", None, POINTER(Complete)),
+ (POINTER(Complete), "&B", (), POINTER(Complete)),
## other
# function signatures are not implemented
- (CFUNCTYPE(None), "X{}", None, CFUNCTYPE(None)),
+ (CFUNCTYPE(None), "X{}", (), CFUNCTYPE(None)),
]
@@ -186,10 +190,10 @@ class LEPoint(LittleEndianStructure):
# and little endian machines.
#
endian_types = [
- (BEPoint, "T{>l:x:>l:y:}", None, BEPoint),
- (LEPoint, "T{<l:x:<l:y:}", None, LEPoint),
- (POINTER(BEPoint), "&T{>l:x:>l:y:}", None, POINTER(BEPoint)),
- (POINTER(LEPoint), "&T{<l:x:<l:y:}", None, POINTER(LEPoint)),
+ (BEPoint, "T{>l:x:>l:y:}", (), BEPoint),
+ (LEPoint, "T{<l:x:<l:y:}", (), LEPoint),
+ (POINTER(BEPoint), "&T{>l:x:>l:y:}", (), POINTER(BEPoint)),
+ (POINTER(LEPoint), "&T{<l:x:<l:y:}", (), POINTER(LEPoint)),
]
if __name__ == "__main__":
diff --git a/Lib/ctypes/test/test_python_api.py b/Lib/ctypes/test/test_python_api.py
index 1f4c603..9de3980 100644
--- a/Lib/ctypes/test/test_python_api.py
+++ b/Lib/ctypes/test/test_python_api.py
@@ -1,5 +1,6 @@
from ctypes import *
import unittest, sys
+from test import support
from ctypes.test import is_resource_enabled
################################################################
@@ -25,6 +26,7 @@ class PythonAPITestCase(unittest.TestCase):
self.assertEqual(PyBytes_FromStringAndSize(b"abcdefghi", 3), b"abc")
+ @support.refcount_test
def test_PyString_FromString(self):
pythonapi.PyBytes_FromString.restype = py_object
pythonapi.PyBytes_FromString.argtypes = (c_char_p,)
@@ -56,6 +58,7 @@ class PythonAPITestCase(unittest.TestCase):
del res
self.assertEqual(grc(42), ref42)
+ @support.refcount_test
def test_PyObj_FromPtr(self):
s = "abc def ghi jkl"
ref = grc(s)
diff --git a/Lib/ctypes/test/test_refcounts.py b/Lib/ctypes/test/test_refcounts.py
index 35a81aa..5613e7a 100644
--- a/Lib/ctypes/test/test_refcounts.py
+++ b/Lib/ctypes/test/test_refcounts.py
@@ -1,4 +1,5 @@
import unittest
+from test import support
import ctypes
import gc
@@ -10,6 +11,7 @@ dll = ctypes.CDLL(_ctypes_test.__file__)
class RefcountTestCase(unittest.TestCase):
+ @support.refcount_test
def test_1(self):
from sys import getrefcount as grc
@@ -34,6 +36,7 @@ class RefcountTestCase(unittest.TestCase):
self.assertEqual(grc(callback), 2)
+ @support.refcount_test
def test_refcount(self):
from sys import getrefcount as grc
def func(*args):
diff --git a/Lib/ctypes/test/test_stringptr.py b/Lib/ctypes/test/test_stringptr.py
index 3d25fa5..95cd161 100644
--- a/Lib/ctypes/test/test_stringptr.py
+++ b/Lib/ctypes/test/test_stringptr.py
@@ -1,4 +1,5 @@
import unittest
+from test import support
from ctypes import *
import _ctypes_test
@@ -7,6 +8,7 @@ lib = CDLL(_ctypes_test.__file__)
class StringPtrTestCase(unittest.TestCase):
+ @support.refcount_test
def test__POINTER_c_char(self):
class X(Structure):
_fields_ = [("str", POINTER(c_char))]
diff --git a/Lib/ctypes/test/test_win32.py b/Lib/ctypes/test/test_win32.py
index 2534a74..128914e 100644
--- a/Lib/ctypes/test/test_win32.py
+++ b/Lib/ctypes/test/test_win32.py
@@ -67,6 +67,28 @@ if sys.platform == "win32":
self.assertEqual(ex.text, "text")
self.assertEqual(ex.details, ("details",))
+ class TestWinError(unittest.TestCase):
+ def test_winerror(self):
+ # see Issue 16169
+ import errno
+ ERROR_INVALID_PARAMETER = 87
+ msg = FormatError(ERROR_INVALID_PARAMETER).strip()
+ args = (errno.EINVAL, msg, None, ERROR_INVALID_PARAMETER)
+
+ e = WinError(ERROR_INVALID_PARAMETER)
+ self.assertEqual(e.args, args)
+ self.assertEqual(e.errno, errno.EINVAL)
+ self.assertEqual(e.winerror, ERROR_INVALID_PARAMETER)
+
+ windll.kernel32.SetLastError(ERROR_INVALID_PARAMETER)
+ try:
+ raise WinError()
+ except OSError as exc:
+ e = exc
+ self.assertEqual(e.args, args)
+ self.assertEqual(e.errno, errno.EINVAL)
+ self.assertEqual(e.winerror, ERROR_INVALID_PARAMETER)
+
class Structures(unittest.TestCase):
def test_struct_by_value(self):
diff --git a/Lib/ctypes/util.py b/Lib/ctypes/util.py
index 1bb7d1d..5555b2e 100644
--- a/Lib/ctypes/util.py
+++ b/Lib/ctypes/util.py
@@ -1,5 +1,6 @@
import sys, os
import contextlib
+import subprocess
# find_library(name) returns the pathname of a library, or None.
if os.name == "nt":
@@ -39,8 +40,8 @@ if os.name == "nt":
clibname = 'msvcr%d' % (version * 10)
# If python was built with in debug mode
- import imp
- if imp.get_suffixes()[0][0] == '_d.pyd':
+ import importlib.machinery
+ if '_d.pyd' in importlib.machinery.EXTENSION_SUFFIXES:
clibname += 'd'
return clibname+'.dll'
@@ -136,16 +137,12 @@ elif os.name == "posix":
rv = f.close()
if rv == 10:
raise OSError('objdump command not found')
- with contextlib.closing(os.popen(cmd)) as f:
- data = f.read()
- res = re.search(r'\sSONAME\s+([^\s]+)', data)
+ res = re.search(r'\sSONAME\s+([^\s]+)', dump)
if not res:
return None
return res.group(1)
- if (sys.platform.startswith("freebsd")
- or sys.platform.startswith("openbsd")
- or sys.platform.startswith("dragonfly")):
+ if sys.platform.startswith(("freebsd", "openbsd", "dragonfly")):
def _num_version(libname):
# "libxyz.so.MAJOR.MINOR" => [ MAJOR, MINOR ]
@@ -174,9 +171,9 @@ elif os.name == "posix":
def _findSoname_ldconfig(name):
import struct
if struct.calcsize('l') == 4:
- machine = os.uname()[4] + '-32'
+ machine = os.uname().machine + '-32'
else:
- machine = os.uname()[4] + '-64'
+ machine = os.uname().machine + '-64'
mach_map = {
'x86_64-64': 'libc6,x86-64',
'ppc64-64': 'libc6,64bit',
@@ -187,13 +184,19 @@ elif os.name == "posix":
abi_type = mach_map.get(machine, 'libc6')
# XXX assuming GLIBC's ldconfig (with option -p)
- expr = r'\s+(lib%s\.[^\s]+)\s+\(%s' % (re.escape(name), abi_type)
- with contextlib.closing(os.popen('LC_ALL=C LANG=C /sbin/ldconfig -p 2>/dev/null')) as f:
- data = f.read()
- res = re.search(expr, data)
- if not res:
- return None
- return res.group(1)
+ regex = os.fsencode(
+ '\s+(lib%s\.[^\s]+)\s+\(%s' % (re.escape(name), abi_type))
+ try:
+ with subprocess.Popen(['/sbin/ldconfig', '-p'],
+ stdin=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL,
+ stdout=subprocess.PIPE,
+ env={'LC_ALL': 'C', 'LANG': 'C'}) as p:
+ res = re.search(regex, p.stdout.read())
+ if res:
+ return os.fsdecode(res.group(1))
+ except OSError:
+ pass
def find_library(name):
return _findSoname_ldconfig(name) or _get_soname(_findLib_gcc(name))
diff --git a/Lib/curses/__init__.py b/Lib/curses/__init__.py
index a3e9def..7bfa667 100644
--- a/Lib/curses/__init__.py
+++ b/Lib/curses/__init__.py
@@ -11,7 +11,6 @@ the package, and perhaps a particular module inside it.
"""
from _curses import *
-from curses.wrapper import wrapper
import os as _os
import sys as _sys
@@ -55,3 +54,48 @@ try:
has_key
except NameError:
from .has_key import has_key
+
+# Wrapper for the entire curses-based application. Runs a function which
+# should be the rest of your curses-based application. If the application
+# raises an exception, wrapper() will restore the terminal to a sane state so
+# you can read the resulting traceback.
+
+def wrapper(func, *args, **kwds):
+ """Wrapper function that initializes curses and calls another function,
+ restoring normal keyboard/screen behavior on error.
+ The callable object 'func' is then passed the main window 'stdscr'
+ as its first argument, followed by any other arguments passed to
+ wrapper().
+ """
+
+ try:
+ # Initialize curses
+ stdscr = initscr()
+
+ # Turn off echoing of keys, and enter cbreak mode,
+ # where no buffering is performed on keyboard input
+ noecho()
+ cbreak()
+
+ # In keypad mode, escape sequences for special keys
+ # (like the cursor keys) will be interpreted and
+ # a special value like curses.KEY_LEFT will be returned
+ stdscr.keypad(1)
+
+ # Start color, too. Harmless if the terminal doesn't have
+ # color; user can test with has_color() later on. The try/catch
+ # works around a minor bit of over-conscientiousness in the curses
+ # module -- the error return from C start_color() is ignorable.
+ try:
+ start_color()
+ except:
+ pass
+
+ return func(stdscr, *args, **kwds)
+ finally:
+ # Set everything back to normal
+ if 'stdscr' in locals():
+ stdscr.keypad(0)
+ echo()
+ nocbreak()
+ endwin()
diff --git a/Lib/curses/wrapper.py b/Lib/curses/wrapper.py
deleted file mode 100644
index 5183ce7..0000000
--- a/Lib/curses/wrapper.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""curses.wrapper
-
-Contains one function, wrapper(), which runs another function which
-should be the rest of your curses-based application. If the
-application raises an exception, wrapper() will restore the terminal
-to a sane state so you can read the resulting traceback.
-
-"""
-
-import curses
-
-def wrapper(func, *args, **kwds):
- """Wrapper function that initializes curses and calls another function,
- restoring normal keyboard/screen behavior on error.
- The callable object 'func' is then passed the main window 'stdscr'
- as its first argument, followed by any other arguments passed to
- wrapper().
- """
-
- try:
- # Initialize curses
- stdscr = curses.initscr()
-
- # Turn off echoing of keys, and enter cbreak mode,
- # where no buffering is performed on keyboard input
- curses.noecho()
- curses.cbreak()
-
- # In keypad mode, escape sequences for special keys
- # (like the cursor keys) will be interpreted and
- # a special value like curses.KEY_LEFT will be returned
- stdscr.keypad(1)
-
- # Start color, too. Harmless if the terminal doesn't have
- # color; user can test with has_color() later on. The try/catch
- # works around a minor bit of over-conscientiousness in the curses
- # module -- the error return from C start_color() is ignorable.
- try:
- curses.start_color()
- except:
- pass
-
- return func(stdscr, *args, **kwds)
- finally:
- # Set everything back to normal
- if 'stdscr' in locals():
- stdscr.keypad(0)
- curses.echo()
- curses.nocbreak()
- curses.endwin()
diff --git a/Lib/datetime.py b/Lib/datetime.py
index bf23e50..f506e9a 100644
--- a/Lib/datetime.py
+++ b/Lib/datetime.py
@@ -172,10 +172,6 @@ def _format_time(hh, mm, ss, us):
# Correctly substitute for %z and %Z escapes in strftime formats.
def _wrap_strftime(object, format, timetuple):
- year = timetuple[0]
- if year < 1000:
- raise ValueError("year=%d is before 1000; the datetime strftime() "
- "methods require year >= 1000" % year)
# Don't call utcoffset() or tzname() unless actually needed.
freplace = None # the string to use for %f
zreplace = None # the string to use for %z
@@ -1069,13 +1065,13 @@ class time:
def __eq__(self, other):
if isinstance(other, time):
- return self._cmp(other) == 0
+ return self._cmp(other, allow_mixed=True) == 0
else:
return False
def __ne__(self, other):
if isinstance(other, time):
- return self._cmp(other) != 0
+ return self._cmp(other, allow_mixed=True) != 0
else:
return True
@@ -1103,7 +1099,7 @@ class time:
else:
_cmperror(self, other)
- def _cmp(self, other):
+ def _cmp(self, other, allow_mixed=False):
assert isinstance(other, time)
mytz = self._tzinfo
ottz = other._tzinfo
@@ -1122,7 +1118,10 @@ class time:
(other._hour, other._minute, other._second,
other._microsecond))
if myoff is None or otoff is None:
- raise TypeError("cannot compare naive and aware times")
+ if allow_mixed:
+ return 2 # arbitrary non-zero value
+ else:
+ raise TypeError("cannot compare naive and aware times")
myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1)
othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1)
return _cmp((myhhmm, self._second, self._microsecond),
@@ -1364,7 +1363,7 @@ class datetime(date):
converter = _time.localtime if tz is None else _time.gmtime
t, frac = divmod(t, 1.0)
- us = round(frac * 1e6)
+ us = int(frac * 1e6)
# If timestamp is less than one microsecond smaller than a
# full second, us can be rounded up to 1000000. In this case,
@@ -1384,7 +1383,7 @@ class datetime(date):
def utcfromtimestamp(cls, t):
"Construct a UTC datetime from a POSIX timestamp (like time.time())."
t, frac = divmod(t, 1.0)
- us = round(frac * 1e6)
+ us = int(frac * 1e6)
# If timestamp is less than one microsecond smaller than a
# full second, us can be rounded up to 1000000. In this case,
@@ -1438,6 +1437,15 @@ class datetime(date):
self.hour, self.minute, self.second,
dst)
+ def timestamp(self):
+ "Return POSIX timestamp as float"
+ if self._tzinfo is None:
+ return _time.mktime((self.year, self.month, self.day,
+ self.hour, self.minute, self.second,
+ -1, -1, -1)) + self.microsecond / 1e6
+ else:
+ return (self - _EPOCH).total_seconds()
+
def utctimetuple(self):
"Return UTC time tuple compatible with time.gmtime()."
offset = self.utcoffset()
@@ -1485,8 +1493,32 @@ class datetime(date):
return datetime(year, month, day, hour, minute, second,
microsecond, tzinfo)
- def astimezone(self, tz):
- if not isinstance(tz, tzinfo):
+ def astimezone(self, tz=None):
+ if tz is None:
+ if self.tzinfo is None:
+ raise ValueError("astimezone() requires an aware datetime")
+ ts = (self - _EPOCH) // timedelta(seconds=1)
+ localtm = _time.localtime(ts)
+ local = datetime(*localtm[:6])
+ try:
+ # Extract TZ data if available
+ gmtoff = localtm.tm_gmtoff
+ zone = localtm.tm_zone
+ except AttributeError:
+ # Compute UTC offset and compare with the value implied
+ # by tm_isdst. If the values match, use the zone name
+ # implied by tm_isdst.
+ delta = local - datetime(*_time.gmtime(ts)[:6])
+ dst = _time.daylight and localtm.tm_isdst > 0
+ gmtoff = -(_time.altzone if dst else _time.timezone)
+ if delta == timedelta(seconds=gmtoff):
+ tz = timezone(delta, _time.tzname[dst])
+ else:
+ tz = timezone(delta)
+ else:
+ tz = timezone(timedelta(seconds=gmtoff), zone)
+
+ elif not isinstance(tz, tzinfo):
raise TypeError("tz argument must be an instance of tzinfo")
mytz = self.tzinfo
@@ -1610,7 +1642,7 @@ class datetime(date):
def __eq__(self, other):
if isinstance(other, datetime):
- return self._cmp(other) == 0
+ return self._cmp(other, allow_mixed=True) == 0
elif not isinstance(other, date):
return NotImplemented
else:
@@ -1618,7 +1650,7 @@ class datetime(date):
def __ne__(self, other):
if isinstance(other, datetime):
- return self._cmp(other) != 0
+ return self._cmp(other, allow_mixed=True) != 0
elif not isinstance(other, date):
return NotImplemented
else:
@@ -1656,7 +1688,7 @@ class datetime(date):
else:
_cmperror(self, other)
- def _cmp(self, other):
+ def _cmp(self, other, allow_mixed=False):
assert isinstance(other, datetime)
mytz = self._tzinfo
ottz = other._tzinfo
@@ -1665,10 +1697,8 @@ class datetime(date):
if mytz is ottz:
base_compare = True
else:
- if mytz is not None:
- myoff = self.utcoffset()
- if ottz is not None:
- otoff = other.utcoffset()
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
base_compare = myoff == otoff
if base_compare:
@@ -1679,7 +1709,10 @@ class datetime(date):
other._hour, other._minute, other._second,
other._microsecond))
if myoff is None or otoff is None:
- raise TypeError("cannot compare naive and aware datetimes")
+ if allow_mixed:
+ return 2 # arbitrary non-zero value
+ else:
+ raise TypeError("cannot compare naive and aware datetimes")
# XXX What follows could be done more efficiently...
diff = self - other # this will take offsets into account
if diff.days < 0:
@@ -1895,7 +1928,7 @@ class timezone(tzinfo):
timezone.utc = timezone._create(timedelta(0))
timezone.min = timezone._create(timezone._minoffset)
timezone.max = timezone._create(timezone._maxoffset)
-
+_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
"""
Some time zone algebra. For a datetime x, let
x.n = x stripped of its timezone -- its naive time.
diff --git a/Lib/decimal.py b/Lib/decimal.py
index 49de535..b74ab01 100644
--- a/Lib/decimal.py
+++ b/Lib/decimal.py
@@ -46,8 +46,8 @@ Decimal('1')
Decimal('-0.0123')
>>> Decimal(123456)
Decimal('123456')
->>> Decimal('123.45e12345678901234567890')
-Decimal('1.2345E+12345678901234567892')
+>>> Decimal('123.45e12345678')
+Decimal('1.2345E+12345680')
>>> Decimal('1.33') + Decimal('1.27')
Decimal('2.60')
>>> Decimal('12.34') + Decimal('3.87') - Decimal('18.41')
@@ -122,13 +122,20 @@ __all__ = [
# Exceptions
'DecimalException', 'Clamped', 'InvalidOperation', 'DivisionByZero',
'Inexact', 'Rounded', 'Subnormal', 'Overflow', 'Underflow',
+ 'FloatOperation',
# Constants for use in setting up contexts
'ROUND_DOWN', 'ROUND_HALF_UP', 'ROUND_HALF_EVEN', 'ROUND_CEILING',
'ROUND_FLOOR', 'ROUND_UP', 'ROUND_HALF_DOWN', 'ROUND_05UP',
# Functions for manipulating contexts
- 'setcontext', 'getcontext', 'localcontext'
+ 'setcontext', 'getcontext', 'localcontext',
+
+ # Limits for the C version for compatibility
+ 'MAX_PREC', 'MAX_EMAX', 'MIN_EMIN', 'MIN_ETINY',
+
+ # C version: compile time choice that enables the thread local context
+ 'HAVE_THREADS'
]
__version__ = '1.70' # Highest version of the spec this complies with
@@ -137,6 +144,7 @@ __version__ = '1.70' # Highest version of the spec this complies with
import copy as _copy
import math as _math
import numbers as _numbers
+import sys
try:
from collections import namedtuple as _namedtuple
@@ -154,6 +162,19 @@ ROUND_UP = 'ROUND_UP'
ROUND_HALF_DOWN = 'ROUND_HALF_DOWN'
ROUND_05UP = 'ROUND_05UP'
+# Compatibility with the C version
+HAVE_THREADS = True
+if sys.maxsize == 2**63-1:
+ MAX_PREC = 999999999999999999
+ MAX_EMAX = 999999999999999999
+ MIN_EMIN = -999999999999999999
+else:
+ MAX_PREC = 425000000
+ MAX_EMAX = 425000000
+ MIN_EMIN = -425000000
+
+MIN_ETINY = MIN_EMIN - (MAX_PREC-1)
+
# Errors
class DecimalException(ArithmeticError):
@@ -370,9 +391,24 @@ class Underflow(Inexact, Rounded, Subnormal):
In all cases, Inexact, Rounded, and Subnormal will also be raised.
"""
+class FloatOperation(DecimalException, TypeError):
+ """Enable stricter semantics for mixing floats and Decimals.
+
+ If the signal is not trapped (default), mixing floats and Decimals is
+ permitted in the Decimal() constructor, context.create_decimal() and
+ all comparison operators. Both conversion and comparisons are exact.
+ Any occurrence of a mixed operation is silently recorded by setting
+ FloatOperation in the context flags. Explicit conversions with
+ Decimal.from_float() or context.create_decimal_from_float() do not
+ set the flag.
+
+ Otherwise (the signal is trapped), only equality comparisons and explicit
+ conversions are silent. All other mixed operations raise FloatOperation.
+ """
+
# List of public traps and flags
_signals = [Clamped, DivisionByZero, Inexact, Overflow, Rounded,
- Underflow, InvalidOperation, Subnormal]
+ Underflow, InvalidOperation, Subnormal, FloatOperation]
# Map conditions (per the spec) to signals
_condition_map = {ConversionSyntax:InvalidOperation,
@@ -380,6 +416,10 @@ _condition_map = {ConversionSyntax:InvalidOperation,
DivisionUndefined:InvalidOperation,
InvalidContext:InvalidOperation}
+# Valid rounding modes
+_rounding_modes = (ROUND_DOWN, ROUND_HALF_UP, ROUND_HALF_EVEN, ROUND_CEILING,
+ ROUND_FLOOR, ROUND_UP, ROUND_HALF_DOWN, ROUND_05UP)
+
##### Context Functions ##################################################
# The getcontext() and setcontext() function manage access to a thread-local
@@ -392,12 +432,11 @@ try:
import threading
except ImportError:
# Python was compiled without threads; create a mock object instead
- import sys
class MockThreading(object):
def local(self, sys=sys):
return sys.modules[__name__]
threading = MockThreading()
- del sys, MockThreading
+ del MockThreading
try:
threading.local
@@ -650,6 +689,11 @@ class Decimal(object):
return self
if isinstance(value, float):
+ if context is None:
+ context = getcontext()
+ context._raise_error(FloatOperation,
+ "strict semantics for mixing floats and Decimals are "
+ "enabled")
value = Decimal.from_float(value)
self._exp = value._exp
self._sign = value._sign
@@ -684,7 +728,9 @@ class Decimal(object):
"""
if isinstance(f, int): # handle integer inputs
return cls(f)
- if _math.isinf(f) or _math.isnan(f): # raises TypeError if not a float
+ if not isinstance(f, float):
+ raise TypeError("argument must be int or float.")
+ if _math.isinf(f) or _math.isnan(f):
return cls(repr(f))
if _math.copysign(1.0, f) == 1.0:
sign = 0
@@ -1877,6 +1923,7 @@ class Decimal(object):
"""
other = _convert_other(other, raiseit=True)
+ third = _convert_other(third, raiseit=True)
# compute product; raise InvalidOperation if either operand is
# a signaling NaN or if the product is zero times infinity.
@@ -1906,17 +1953,17 @@ class Decimal(object):
str(int(self._int) * int(other._int)),
self._exp + other._exp)
- third = _convert_other(third, raiseit=True)
return product.__add__(third, context)
def _power_modulo(self, other, modulo, context=None):
"""Three argument version of __pow__"""
- # if can't convert other and modulo to Decimal, raise
- # TypeError; there's no point returning NotImplemented (no
- # equivalent of __rpow__ for three argument pow)
- other = _convert_other(other, raiseit=True)
- modulo = _convert_other(modulo, raiseit=True)
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ modulo = _convert_other(modulo)
+ if modulo is NotImplemented:
+ return modulo
if context is None:
context = getcontext()
@@ -2007,9 +2054,9 @@ class Decimal(object):
nonzero. For efficiency, other._exp should not be too large,
so that 10**abs(other._exp) is a feasible calculation."""
- # In the comments below, we write x for the value of self and
- # y for the value of other. Write x = xc*10**xe and y =
- # yc*10**ye.
+ # In the comments below, we write x for the value of self and y for the
+ # value of other. Write x = xc*10**xe and abs(y) = yc*10**ye, with xc
+ # and yc positive integers not divisible by 10.
# The main purpose of this method is to identify the *failure*
# of x**y to be exactly representable with as little effort as
@@ -2017,13 +2064,12 @@ class Decimal(object):
# eliminate the possibility of x**y being exact. Only if all
# these tests are passed do we go on to actually compute x**y.
- # Here's the main idea. First normalize both x and y. We
- # express y as a rational m/n, with m and n relatively prime
- # and n>0. Then for x**y to be exactly representable (at
- # *any* precision), xc must be the nth power of a positive
- # integer and xe must be divisible by n. If m is negative
- # then additionally xc must be a power of either 2 or 5, hence
- # a power of 2**n or 5**n.
+ # Here's the main idea. Express y as a rational number m/n, with m and
+ # n relatively prime and n>0. Then for x**y to be exactly
+ # representable (at *any* precision), xc must be the nth power of a
+ # positive integer and xe must be divisible by n. If y is negative
+ # then additionally xc must be a power of either 2 or 5, hence a power
+ # of 2**n or 5**n.
#
# There's a limit to how small |y| can be: if y=m/n as above
# then:
@@ -2095,21 +2141,43 @@ class Decimal(object):
return None
# now xc is a power of 2; e is its exponent
e = _nbits(xc)-1
- # find e*y and xe*y; both must be integers
- if ye >= 0:
- y_as_int = yc*10**ye
- e = e*y_as_int
- xe = xe*y_as_int
- else:
- ten_pow = 10**-ye
- e, remainder = divmod(e*yc, ten_pow)
- if remainder:
- return None
- xe, remainder = divmod(xe*yc, ten_pow)
- if remainder:
- return None
-
- if e*65 >= p*93: # 93/65 > log(10)/log(5)
+
+ # We now have:
+ #
+ # x = 2**e * 10**xe, e > 0, and y < 0.
+ #
+ # The exact result is:
+ #
+ # x**y = 5**(-e*y) * 10**(e*y + xe*y)
+ #
+ # provided that both e*y and xe*y are integers. Note that if
+ # 5**(-e*y) >= 10**p, then the result can't be expressed
+ # exactly with p digits of precision.
+ #
+ # Using the above, we can guard against large values of ye.
+ # 93/65 is an upper bound for log(10)/log(5), so if
+ #
+ # ye >= len(str(93*p//65))
+ #
+ # then
+ #
+ # -e*y >= -y >= 10**ye > 93*p/65 > p*log(10)/log(5),
+ #
+ # so 5**(-e*y) >= 10**p, and the coefficient of the result
+ # can't be expressed in p digits.
+
+ # emax >= largest e such that 5**e < 10**p.
+ emax = p*93//65
+ if ye >= len(str(emax)):
+ return None
+
+ # Find -e*y and -xe*y; both must be integers
+ e = _decimal_lshift_exact(e * yc, ye)
+ xe = _decimal_lshift_exact(xe * yc, ye)
+ if e is None or xe is None:
+ return None
+
+ if e > emax:
return None
xc = 5**e
@@ -2123,19 +2191,20 @@ class Decimal(object):
while xc % 5 == 0:
xc //= 5
e -= 1
- if ye >= 0:
- y_as_integer = yc*10**ye
- e = e*y_as_integer
- xe = xe*y_as_integer
- else:
- ten_pow = 10**-ye
- e, remainder = divmod(e*yc, ten_pow)
- if remainder:
- return None
- xe, remainder = divmod(xe*yc, ten_pow)
- if remainder:
- return None
- if e*3 >= p*10: # 10/3 > log(10)/log(2)
+
+ # Guard against large values of ye, using the same logic as in
+ # the 'xc is a power of 2' branch. 10/3 is an upper bound for
+ # log(10)/log(2).
+ emax = p*10//3
+ if ye >= len(str(emax)):
+ return None
+
+ e = _decimal_lshift_exact(e * yc, ye)
+ xe = _decimal_lshift_exact(xe * yc, ye)
+ if e is None or xe is None:
+ return None
+
+ if e > emax:
return None
xc = 2**e
else:
@@ -3816,11 +3885,9 @@ class Context(object):
clamp - If 1, change exponents if too high (Default 0)
"""
- def __init__(self, prec=None, rounding=None,
- traps=None, flags=None,
- Emin=None, Emax=None,
- capitals=None, clamp=None,
- _ignored_flags=None):
+ def __init__(self, prec=None, rounding=None, Emin=None, Emax=None,
+ capitals=None, clamp=None, flags=None, traps=None,
+ _ignored_flags=None):
# Set defaults; for everything except flags and _ignored_flags,
# inherit from DefaultContext.
try:
@@ -3843,17 +3910,78 @@ class Context(object):
if traps is None:
self.traps = dc.traps.copy()
elif not isinstance(traps, dict):
- self.traps = dict((s, int(s in traps)) for s in _signals)
+ self.traps = dict((s, int(s in traps)) for s in _signals + traps)
else:
self.traps = traps
if flags is None:
self.flags = dict.fromkeys(_signals, 0)
elif not isinstance(flags, dict):
- self.flags = dict((s, int(s in flags)) for s in _signals)
+ self.flags = dict((s, int(s in flags)) for s in _signals + flags)
else:
self.flags = flags
+ def _set_integer_check(self, name, value, vmin, vmax):
+ if not isinstance(value, int):
+ raise TypeError("%s must be an integer" % name)
+ if vmin == '-inf':
+ if value > vmax:
+ raise ValueError("%s must be in [%s, %d]. got: %s" % (name, vmin, vmax, value))
+ elif vmax == 'inf':
+ if value < vmin:
+ raise ValueError("%s must be in [%d, %s]. got: %s" % (name, vmin, vmax, value))
+ else:
+ if value < vmin or value > vmax:
+ raise ValueError("%s must be in [%d, %d]. got %s" % (name, vmin, vmax, value))
+ return object.__setattr__(self, name, value)
+
+ def _set_signal_dict(self, name, d):
+ if not isinstance(d, dict):
+ raise TypeError("%s must be a signal dict" % d)
+ for key in d:
+ if not key in _signals:
+ raise KeyError("%s is not a valid signal dict" % d)
+ for key in _signals:
+ if not key in d:
+ raise KeyError("%s is not a valid signal dict" % d)
+ return object.__setattr__(self, name, d)
+
+ def __setattr__(self, name, value):
+ if name == 'prec':
+ return self._set_integer_check(name, value, 1, 'inf')
+ elif name == 'Emin':
+ return self._set_integer_check(name, value, '-inf', 0)
+ elif name == 'Emax':
+ return self._set_integer_check(name, value, 0, 'inf')
+ elif name == 'capitals':
+ return self._set_integer_check(name, value, 0, 1)
+ elif name == 'clamp':
+ return self._set_integer_check(name, value, 0, 1)
+ elif name == 'rounding':
+ if not value in _rounding_modes:
+ # raise TypeError even for strings to have consistency
+ # among various implementations.
+ raise TypeError("%s: invalid rounding mode" % value)
+ return object.__setattr__(self, name, value)
+ elif name == 'flags' or name == 'traps':
+ return self._set_signal_dict(name, value)
+ elif name == '_ignored_flags':
+ return object.__setattr__(self, name, value)
+ else:
+ raise AttributeError(
+ "'decimal.Context' object has no attribute '%s'" % name)
+
+ def __delattr__(self, name):
+ raise AttributeError("%s cannot be deleted" % name)
+
+ # Support for pickling, copy, and deepcopy
+ def __reduce__(self):
+ flags = [sig for sig, v in self.flags.items() if v]
+ traps = [sig for sig, v in self.traps.items() if v]
+ return (self.__class__,
+ (self.prec, self.rounding, self.Emin, self.Emax,
+ self.capitals, self.clamp, flags, traps))
+
def __repr__(self):
"""Show the current context."""
s = []
@@ -3872,43 +4000,27 @@ class Context(object):
for flag in self.flags:
self.flags[flag] = 0
+ def clear_traps(self):
+ """Reset all traps to zero"""
+ for flag in self.traps:
+ self.traps[flag] = 0
+
def _shallow_copy(self):
"""Returns a shallow copy from self."""
- nc = Context(self.prec, self.rounding, self.traps,
- self.flags, self.Emin, self.Emax,
- self.capitals, self.clamp, self._ignored_flags)
+ nc = Context(self.prec, self.rounding, self.Emin, self.Emax,
+ self.capitals, self.clamp, self.flags, self.traps,
+ self._ignored_flags)
return nc
def copy(self):
"""Returns a deep copy from self."""
- nc = Context(self.prec, self.rounding, self.traps.copy(),
- self.flags.copy(), self.Emin, self.Emax,
- self.capitals, self.clamp, self._ignored_flags)
+ nc = Context(self.prec, self.rounding, self.Emin, self.Emax,
+ self.capitals, self.clamp,
+ self.flags.copy(), self.traps.copy(),
+ self._ignored_flags)
return nc
__copy__ = copy
- # _clamp is provided for backwards compatibility with third-party
- # code. May be removed in Python >= 3.3.
- def _get_clamp(self):
- "_clamp mirrors the clamp attribute. Its use is deprecated."
- import warnings
- warnings.warn('Use of the _clamp attribute is deprecated. '
- 'Please use clamp instead.',
- DeprecationWarning)
- return self.clamp
-
- def _set_clamp(self, clamp):
- "_clamp mirrors the clamp attribute. Its use is deprecated."
- import warnings
- warnings.warn('Use of the _clamp attribute is deprecated. '
- 'Please use clamp instead.',
- DeprecationWarning)
- self.clamp = clamp
-
- # don't bother with _del_clamp; no sane 3rd party code should
- # be deleting the _clamp attribute
- _clamp = property(_get_clamp, _set_clamp)
-
def _raise_error(self, condition, explanation = None, *args):
"""Handles an error
@@ -4068,6 +4180,8 @@ class Context(object):
>>> ExtendedContext.canonical(Decimal('2.50'))
Decimal('2.50')
"""
+ if not isinstance(a, Decimal):
+ raise TypeError("canonical requires a Decimal as an argument.")
return a.canonical(context=self)
def compare(self, a, b):
@@ -4378,6 +4492,8 @@ class Context(object):
>>> ExtendedContext.is_canonical(Decimal('2.50'))
True
"""
+ if not isinstance(a, Decimal):
+ raise TypeError("is_canonical requires a Decimal as an argument.")
return a.is_canonical()
def is_finite(self, a):
@@ -4970,7 +5086,7 @@ class Context(object):
+Normal
+Infinity
- >>> c = Context(ExtendedContext)
+ >>> c = ExtendedContext.copy()
>>> c.Emin = -999
>>> c.Emax = 999
>>> c.number_class(Decimal('Infinity'))
@@ -5535,6 +5651,27 @@ def _normalize(op1, op2, prec = 0):
_nbits = int.bit_length
+def _decimal_lshift_exact(n, e):
+ """ Given integers n and e, return n * 10**e if it's an integer, else None.
+
+ The computation is designed to avoid computing large powers of 10
+ unnecessarily.
+
+ >>> _decimal_lshift_exact(3, 4)
+ 30000
+ >>> _decimal_lshift_exact(300, -999999999) # returns None
+
+ """
+ if n == 0:
+ return 0
+ elif e >= 0:
+ return n * 10**e
+ else:
+ # val_n = largest power of 10 dividing n.
+ str_n = str(abs(n))
+ val_n = len(str_n) - len(str_n.rstrip('0'))
+ return None if val_n < -e else n // 10**-e
+
def _sqrt_nearest(n, a):
"""Closest integer to the square root of the positive integer n. a is
an initial approximation to the square root. Any positive integer
@@ -5901,6 +6038,12 @@ def _convert_for_comparison(self, other, equality_op=False):
if equality_op and isinstance(other, _numbers.Complex) and other.imag == 0:
other = other.real
if isinstance(other, float):
+ context = getcontext()
+ if equality_op:
+ context.flags[FloatOperation] = 1
+ else:
+ context._raise_error(FloatOperation,
+ "strict semantics for mixing floats and Decimals are enabled")
return self, Decimal.from_float(other)
return NotImplemented, NotImplemented
@@ -5914,8 +6057,8 @@ DefaultContext = Context(
prec=28, rounding=ROUND_HALF_EVEN,
traps=[DivisionByZero, Overflow, InvalidOperation],
flags=[],
- Emax=999999999,
- Emin=-999999999,
+ Emax=999999,
+ Emin=-999999,
capitals=1,
clamp=0
)
@@ -6065,7 +6208,7 @@ def _parse_format_specifier(format_spec, _localeconv=None):
# if format type is 'g' or 'G' then a precision of 0 makes little
# sense; convert it to 1. Same if format type is unspecified.
if format_dict['precision'] == 0:
- if format_dict['type'] is None or format_dict['type'] in 'gG':
+ if format_dict['type'] is None or format_dict['type'] in 'gGn':
format_dict['precision'] = 1
# determine thousands separator, grouping, and decimal separator, and
@@ -6239,16 +6382,26 @@ _SignedInfinity = (_Infinity, _NegativeInfinity)
# Constants related to the hash implementation; hash(x) is based
# on the reduction of x modulo _PyHASH_MODULUS
-import sys
_PyHASH_MODULUS = sys.hash_info.modulus
# hash values to use for positive and negative infinities, and nans
_PyHASH_INF = sys.hash_info.inf
_PyHASH_NAN = sys.hash_info.nan
-del sys
# _PyHASH_10INV is the inverse of 10 modulo the prime _PyHASH_MODULUS
_PyHASH_10INV = pow(10, _PyHASH_MODULUS - 2, _PyHASH_MODULUS)
+del sys
+try:
+ import _decimal
+except ImportError:
+ pass
+else:
+ s1 = set(dir())
+ s2 = set(dir(_decimal))
+ for name in s1 - s2:
+ del globals()[name]
+ del s1, s2, name
+ from _decimal import *
if __name__ == '__main__':
import doctest, decimal
diff --git a/Lib/difflib.py b/Lib/difflib.py
index e6cc6ee..ae377d7 100644
--- a/Lib/difflib.py
+++ b/Lib/difflib.py
@@ -204,7 +204,7 @@ class SequenceMatcher:
# returning true iff the element is "junk" -- this has
# subtle but helpful effects on the algorithm, which I'll
# get around to writing up someday <0.9 wink>.
- # DON'T USE! Only __chain_b uses this. Use isbjunk.
+ # DON'T USE! Only __chain_b uses this. Use "in self.bjunk".
# bjunk
# the items in b for which isjunk is True.
# bpopular
@@ -287,7 +287,6 @@ class SequenceMatcher:
# when self.isjunk is defined, junk elements don't show up in this
# map at all, which stops the central find_longest_match method
# from starting any matching block at a junk element ...
- # also creates the fast isbjunk function ...
# b2j also does not contain entries for "popular" elements, meaning
# elements that account for more than 1 + 1% of the total elements, and
# when the sequence is reasonably large (>= 200 elements); this can
@@ -800,7 +799,7 @@ class Differ:
... 2. Explicit is better than implicit.
... 3. Simple is better than complex.
... 4. Complex is better than complicated.
- ... '''.splitlines(1)
+ ... '''.splitlines(keepends=True)
>>> len(text1)
4
>>> text1[0][-1]
@@ -809,7 +808,7 @@ class Differ:
... 3. Simple is better than complex.
... 4. Complicated is better than complex.
... 5. Flat is better than nested.
- ... '''.splitlines(1)
+ ... '''.splitlines(keepends=True)
Next we instantiate a Differ object:
@@ -896,8 +895,8 @@ class Differ:
Example:
- >>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(1),
- ... 'ore\ntree\nemu\n'.splitlines(1))),
+ >>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(True),
+ ... 'ore\ntree\nemu\n'.splitlines(True))),
... end="")
- one
? ^
@@ -1269,8 +1268,8 @@ def context_diff(a, b, fromfile='', tofile='',
Example:
- >>> print(''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
- ... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current')),
+ >>> print(''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(True),
+ ... 'zero\none\ntree\nfour\n'.splitlines(True), 'Original', 'Current')),
... end="")
*** Original
--- Current
@@ -1339,8 +1338,8 @@ def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
Example:
- >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
- ... 'ore\ntree\nemu\n'.splitlines(1))
+ >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True),
+ ... 'ore\ntree\nemu\n'.splitlines(keepends=True))
>>> print(''.join(diff), end="")
- one
? ^
@@ -2034,8 +2033,8 @@ def restore(delta, which):
Examples:
- >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
- ... 'ore\ntree\nemu\n'.splitlines(1))
+ >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True),
+ ... 'ore\ntree\nemu\n'.splitlines(keepends=True))
>>> diff = list(diff)
>>> print(''.join(restore(diff, 1)), end="")
one
diff --git a/Lib/dis.py b/Lib/dis.py
index f64bae6..543fdc7 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -190,6 +190,9 @@ def disassemble(co, lasti=-1):
if free is None:
free = co.co_cellvars + co.co_freevars
print('(' + free[oparg] + ')', end=' ')
+ elif op in hasnargs:
+ print('(%d positional, %d keyword pair)'
+ % (code[i-2], code[i-1]), end=' ')
print()
def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
@@ -229,6 +232,9 @@ def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
print('(%d)' % oparg, end=' ')
elif op in hascompare:
print('(' + cmp_op[oparg] + ')', end=' ')
+ elif op in hasnargs:
+ print('(%d positional, %d keyword pair)'
+ % (code[i-2], code[i-1]), end=' ')
print()
def _disassemble_str(source):
diff --git a/Lib/distutils/__init__.py b/Lib/distutils/__init__.py
index b52a9fe..345ac4f 100644
--- a/Lib/distutils/__init__.py
+++ b/Lib/distutils/__init__.py
@@ -13,5 +13,5 @@ used from a setup script as
# Updated automatically by the Python release process.
#
#--start constants--
-__version__ = "3.2.3"
+__version__ = "3.3.0"
#--end constants--
diff --git a/Lib/distutils/command/bdist_wininst.py b/Lib/distutils/command/bdist_wininst.py
index e3ed3ad..959a8bf 100644
--- a/Lib/distutils/command/bdist_wininst.py
+++ b/Lib/distutils/command/bdist_wininst.py
@@ -265,11 +265,11 @@ class bdist_wininst(Command):
cfgdata = cfgdata + b"\0"
if self.pre_install_script:
# We need to normalize newlines, so we open in text mode and
- # convert back to bytes. "latin1" simply avoids any possible
+ # convert back to bytes. "latin-1" simply avoids any possible
# failures.
with open(self.pre_install_script, "r",
- encoding="latin1") as script:
- script_data = script.read().encode("latin1")
+ encoding="latin-1") as script:
+ script_data = script.read().encode("latin-1")
cfgdata = cfgdata + script_data + b"\n\0"
else:
# empty pre-install script
diff --git a/Lib/distutils/command/build_ext.py b/Lib/distutils/command/build_ext.py
index 34b61bd..f16e2f1 100644
--- a/Lib/distutils/command/build_ext.py
+++ b/Lib/distutils/command/build_ext.py
@@ -8,6 +8,7 @@ import sys, os, re
from distutils.core import Command
from distutils.errors import *
from distutils.sysconfig import customize_compiler, get_python_version
+from distutils.sysconfig import get_config_h_filename
from distutils.dep_util import newer_group
from distutils.extension import Extension
from distutils.util import get_platform
@@ -196,8 +197,11 @@ class build_ext(Command):
# Append the source distribution include and library directories,
# this allows distutils on windows to work in the source tree
- self.include_dirs.append(os.path.join(sys.exec_prefix, 'PC'))
- if MSVC_VERSION == 9:
+ self.include_dirs.append(os.path.dirname(get_config_h_filename()))
+ _sys_home = getattr(sys, '_home', None)
+ if _sys_home:
+ self.library_dirs.append(_sys_home)
+ if MSVC_VERSION >= 9:
# Use the .lib files for the correct architecture
if self.plat_name == 'win32':
suffix = ''
@@ -239,8 +243,7 @@ class build_ext(Command):
# for extensions under Linux or Solaris with a shared Python library,
# Python's library directory must be appended to library_dirs
sysconfig.get_config_var('Py_ENABLE_SHARED')
- if ((sys.platform.startswith('linux') or sys.platform.startswith('gnu')
- or sys.platform.startswith('sunos'))
+ if (sys.platform.startswith(('linux', 'gnu', 'sunos'))
and sysconfig.get_config_var('Py_ENABLE_SHARED')):
if sys.executable.startswith(os.path.join(sys.exec_prefix, "bin")):
# building third party extensions
diff --git a/Lib/distutils/command/build_scripts.py b/Lib/distutils/command/build_scripts.py
index ec43477..4b5b22e 100644
--- a/Lib/distutils/command/build_scripts.py
+++ b/Lib/distutils/command/build_scripts.py
@@ -126,10 +126,9 @@ class build_scripts(Command):
"The shebang ({!r}) is not decodable "
"from the script encoding ({})"
.format(shebang, encoding))
- outf = open(outfile, "wb")
- outf.write(shebang)
- outf.writelines(f.readlines())
- outf.close()
+ with open(outfile, "wb") as outf:
+ outf.write(shebang)
+ outf.writelines(f.readlines())
if f:
f.close()
else:
diff --git a/Lib/distutils/command/wininst-10.0-amd64.exe b/Lib/distutils/command/wininst-10.0-amd64.exe
new file mode 100644
index 0000000..6fa0dce
--- /dev/null
+++ b/Lib/distutils/command/wininst-10.0-amd64.exe
Binary files differ
diff --git a/Lib/distutils/command/wininst-10.0.exe b/Lib/distutils/command/wininst-10.0.exe
new file mode 100644
index 0000000..afc3bc6
--- /dev/null
+++ b/Lib/distutils/command/wininst-10.0.exe
Binary files differ
diff --git a/Lib/distutils/cygwinccompiler.py b/Lib/distutils/cygwinccompiler.py
index 819e1a9..0bdd539 100644
--- a/Lib/distutils/cygwinccompiler.py
+++ b/Lib/distutils/cygwinccompiler.py
@@ -78,6 +78,9 @@ def get_msvcr():
elif msc_ver == '1500':
# VS2008 / MSVC 9.0
return ['msvcr90']
+ elif msc_ver == '1600':
+ # VS2010 / MSVC 10.0
+ return ['msvcr100']
else:
raise ValueError("Unknown MS Compiler version %s " % msc_ver)
diff --git a/Lib/distutils/sysconfig.py b/Lib/distutils/sysconfig.py
index 16902ca..317640c 100644
--- a/Lib/distutils/sysconfig.py
+++ b/Lib/distutils/sysconfig.py
@@ -18,6 +18,8 @@ from .errors import DistutilsPlatformError
# These are needed in a couple of spots, so just compute them once.
PREFIX = os.path.normpath(sys.prefix)
EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
+BASE_PREFIX = os.path.normpath(sys.base_prefix)
+BASE_EXEC_PREFIX = os.path.normpath(sys.base_exec_prefix)
# Path to the base directory of the project. On Windows the binary may
# live in project/PCBuild9. If we're dealing with an x64 Windows build,
@@ -39,11 +41,21 @@ if os.name == "nt" and "\\pcbuild\\amd64" in project_base[-14:].lower():
# different (hard-wired) directories.
# Setup.local is available for Makefile builds including VPATH builds,
# Setup.dist is available on Windows
-def _python_build():
+def _is_python_source_dir(d):
for fn in ("Setup.dist", "Setup.local"):
- if os.path.isfile(os.path.join(project_base, "Modules", fn)):
+ if os.path.isfile(os.path.join(d, "Modules", fn)):
return True
return False
+_sys_home = getattr(sys, '_home', None)
+if _sys_home and os.name == 'nt' and \
+ _sys_home.lower().endswith(('pcbuild', 'pcbuild\\amd64')):
+ _sys_home = os.path.dirname(_sys_home)
+ if _sys_home.endswith('pcbuild'): # must be amd64
+ _sys_home = os.path.dirname(_sys_home)
+def _python_build():
+ if _sys_home:
+ return _is_python_source_dir(_sys_home)
+ return _is_python_source_dir(project_base)
python_build = _python_build()
# Calculate the build qualifier flags if they are defined. Adding the flags
@@ -74,11 +86,11 @@ def get_python_inc(plat_specific=0, prefix=None):
otherwise, this is the path to platform-specific header files
(namely pyconfig.h).
- If 'prefix' is supplied, use it instead of sys.prefix or
- sys.exec_prefix -- i.e., ignore 'plat_specific'.
+ If 'prefix' is supplied, use it instead of sys.base_prefix or
+ sys.base_exec_prefix -- i.e., ignore 'plat_specific'.
"""
if prefix is None:
- prefix = plat_specific and EXEC_PREFIX or PREFIX
+ prefix = plat_specific and BASE_EXEC_PREFIX or BASE_PREFIX
if os.name == "posix":
if python_build:
# Assume the executable is in the build directory. The
@@ -86,12 +98,14 @@ def get_python_inc(plat_specific=0, prefix=None):
# the build directory may not be the source directory, we
# must use "srcdir" from the makefile to find the "Include"
# directory.
- base = os.path.dirname(os.path.abspath(sys.executable))
+ base = _sys_home or os.path.dirname(os.path.abspath(sys.executable))
if plat_specific:
return base
+ if _sys_home:
+ incdir = os.path.join(_sys_home, get_config_var('AST_H_DIR'))
else:
incdir = os.path.join(get_config_var('srcdir'), 'Include')
- return os.path.normpath(incdir)
+ return os.path.normpath(incdir)
python_dir = 'python' + get_python_version() + build_flags
return os.path.join(prefix, "include", python_dir)
elif os.name == "nt":
@@ -115,11 +129,14 @@ def get_python_lib(plat_specific=0, standard_lib=0, prefix=None):
containing standard Python library modules; otherwise, return the
directory for site-specific modules.
- If 'prefix' is supplied, use it instead of sys.prefix or
- sys.exec_prefix -- i.e., ignore 'plat_specific'.
+ If 'prefix' is supplied, use it instead of sys.base_prefix or
+ sys.base_exec_prefix -- i.e., ignore 'plat_specific'.
"""
if prefix is None:
- prefix = plat_specific and EXEC_PREFIX or PREFIX
+ if standard_lib:
+ prefix = plat_specific and BASE_EXEC_PREFIX or BASE_PREFIX
+ else:
+ prefix = plat_specific and EXEC_PREFIX or PREFIX
if os.name == "posix":
libpython = os.path.join(prefix,
@@ -146,7 +163,7 @@ def get_python_lib(plat_specific=0, standard_lib=0, prefix=None):
"I don't know where Python installs its library "
"on platform '%s'" % os.name)
-_USE_CLANG = None
+
def customize_compiler(compiler):
"""Do any platform-specific customization of a CCompiler instance.
@@ -155,42 +172,28 @@ def customize_compiler(compiler):
varies across Unices and is stored in Python's Makefile.
"""
if compiler.compiler_type == "unix":
+ if sys.platform == "darwin":
+ # Perform first-time customization of compiler-related
+ # config vars on OS X now that we know we need a compiler.
+ # This is primarily to support Pythons from binary
+ # installers. The kind and paths to build tools on
+ # the user system may vary significantly from the system
+ # that Python itself was built on. Also the user OS
+ # version and build tools may not support the same set
+ # of CPU architectures for universal builds.
+ global _config_vars
+ if not _config_vars.get('CUSTOMIZED_OSX_COMPILER', ''):
+ import _osx_support
+ _osx_support.customize_compiler(_config_vars)
+ _config_vars['CUSTOMIZED_OSX_COMPILER'] = 'True'
+
(cc, cxx, opt, cflags, ccshared, ldshared, so_ext, ar, ar_flags) = \
get_config_vars('CC', 'CXX', 'OPT', 'CFLAGS',
'CCSHARED', 'LDSHARED', 'SO', 'AR', 'ARFLAGS')
newcc = None
if 'CC' in os.environ:
- newcc = os.environ['CC']
- elif sys.platform == 'darwin' and cc == 'gcc-4.2':
- # Issue #13590:
- # Since Apple removed gcc-4.2 in Xcode 4.2, we can no
- # longer assume it is available for extension module builds.
- # If Python was built with gcc-4.2, check first to see if
- # it is available on this system; if not, try to use clang
- # instead unless the caller explicitly set CC.
- global _USE_CLANG
- if _USE_CLANG is None:
- from distutils import log
- from subprocess import Popen, PIPE
- p = Popen("! type gcc-4.2 && type clang && exit 2",
- shell=True, stdout=PIPE, stderr=PIPE)
- p.wait()
- if p.returncode == 2:
- _USE_CLANG = True
- log.warn("gcc-4.2 not found, using clang instead")
- else:
- _USE_CLANG = False
- if _USE_CLANG:
- newcc = 'clang'
- if newcc:
- # On OS X, if CC is overridden, use that as the default
- # command for LDSHARED as well
- if (sys.platform == 'darwin'
- and 'LDSHARED' not in os.environ
- and ldshared.startswith(cc)):
- ldshared = newcc + ldshared[len(cc):]
- cc = newcc
+ cc = os.environ['CC']
if 'CXX' in os.environ:
cxx = os.environ['CXX']
if 'LDSHARED' in os.environ:
@@ -232,9 +235,9 @@ def get_config_h_filename():
"""Return full pathname of installed pyconfig.h file."""
if python_build:
if os.name == "nt":
- inc_dir = os.path.join(project_base, "PC")
+ inc_dir = os.path.join(_sys_home or project_base, "PC")
else:
- inc_dir = project_base
+ inc_dir = _sys_home or project_base
else:
inc_dir = get_python_inc(plat_specific=1)
if get_python_version() < '2.2':
@@ -248,7 +251,8 @@ def get_config_h_filename():
def get_makefile_filename():
"""Return full pathname of installed Makefile from the Python build."""
if python_build:
- return os.path.join(os.path.dirname(sys.executable), "Makefile")
+ return os.path.join(_sys_home or os.path.dirname(sys.executable),
+ "Makefile")
lib_dir = get_python_lib(plat_specific=0, standard_lib=1)
config_file = 'config-{}{}'.format(get_python_version(), build_flags)
return os.path.join(lib_dir, config_file, 'Makefile')
@@ -510,7 +514,7 @@ def get_config_vars(*args):
variables relevant for the current platform. Generally this includes
everything needed to build extensions and install both pure modules and
extensions. On Unix, this means every variable defined in Python's
- installed Makefile; on Windows and Mac OS it's a much smaller set.
+ installed Makefile; on Windows it's a much smaller set.
With arguments, return a list of values that result from looking up
each argument in the configuration variable dictionary.
@@ -529,6 +533,23 @@ def get_config_vars(*args):
_config_vars['prefix'] = PREFIX
_config_vars['exec_prefix'] = EXEC_PREFIX
+ # Always convert srcdir to an absolute path
+ srcdir = _config_vars.get('srcdir', project_base)
+ if os.name == 'posix':
+ if python_build:
+ # If srcdir is a relative path (typically '.' or '..')
+ # then it should be interpreted relative to the directory
+ # containing Makefile.
+ base = os.path.dirname(get_makefile_filename())
+ srcdir = os.path.join(base, srcdir)
+ else:
+ # srcdir is not meaningful since the installation is
+ # spread about the filesystem. We choose the
+ # directory containing the Makefile since we know it
+ # exists.
+ srcdir = os.path.dirname(get_makefile_filename())
+ _config_vars['srcdir'] = os.path.abspath(os.path.normpath(srcdir))
+
# Convert srcdir into an absolute path if it appears necessary.
# Normally it is relative to the build directory. However, during
# testing, for example, we might be running a non-installed python
@@ -543,43 +564,11 @@ def get_config_vars(*args):
srcdir = os.path.join(base, _config_vars['srcdir'])
_config_vars['srcdir'] = os.path.normpath(srcdir)
+ # OS X platforms require special customization to handle
+ # multi-architecture, multi-os-version installers
if sys.platform == 'darwin':
- kernel_version = os.uname()[2] # Kernel version (8.4.3)
- major_version = int(kernel_version.split('.')[0])
-
- if major_version < 8:
- # On Mac OS X before 10.4, check if -arch and -isysroot
- # are in CFLAGS or LDFLAGS and remove them if they are.
- # This is needed when building extensions on a 10.3 system
- # using a universal build of python.
- for key in ('LDFLAGS', 'BASECFLAGS',
- # a number of derived variables. These need to be
- # patched up as well.
- 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
- flags = _config_vars[key]
- flags = re.sub('-arch\s+\w+\s', ' ', flags, re.ASCII)
- flags = re.sub('-isysroot [^ \t]*', ' ', flags)
- _config_vars[key] = flags
-
- else:
-
- # Allow the user to override the architecture flags using
- # an environment variable.
- # NOTE: This name was introduced by Apple in OSX 10.5 and
- # is used by several scripting languages distributed with
- # that OS release.
-
- if 'ARCHFLAGS' in os.environ:
- arch = os.environ['ARCHFLAGS']
- for key in ('LDFLAGS', 'BASECFLAGS',
- # a number of derived variables. These need to be
- # patched up as well.
- 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
-
- flags = _config_vars[key]
- flags = re.sub('-arch\s+\w+\s', ' ', flags)
- flags = flags + ' ' + arch
- _config_vars[key] = flags
+ import _osx_support
+ _osx_support.customize_config_vars(_config_vars)
if args:
vals = []
diff --git a/Lib/distutils/tests/test_archive_util.py b/Lib/distutils/tests/test_archive_util.py
index 8edfab4..1afdd46 100644
--- a/Lib/distutils/tests/test_archive_util.py
+++ b/Lib/distutils/tests/test_archive_util.py
@@ -1,6 +1,8 @@
+# -*- coding: utf-8 -*-
"""Tests for distutils.archive_util."""
import unittest
import os
+import sys
import tarfile
from os.path import splitdrive
import warnings
@@ -25,6 +27,18 @@ try:
except ImportError:
ZLIB_SUPPORT = False
+def can_fs_encode(filename):
+ """
+ Return True if the filename can be saved in the file system.
+ """
+ if os.path.supports_unicode_filenames:
+ return True
+ try:
+ filename.encode(sys.getfilesystemencoding())
+ except UnicodeEncodeError:
+ return False
+ return True
+
class ArchiveUtilTestCase(support.TempdirManager,
support.LoggingSilencer,
@@ -32,6 +46,28 @@ class ArchiveUtilTestCase(support.TempdirManager,
@unittest.skipUnless(ZLIB_SUPPORT, 'Need zlib support to run')
def test_make_tarball(self):
+ self._make_tarball('archive')
+
+ @unittest.skipUnless(ZLIB_SUPPORT, 'Need zlib support to run')
+ @unittest.skipUnless(can_fs_encode('Ã¥rchiv'),
+ 'File system cannot handle this filename')
+ def test_make_tarball_latin1(self):
+ """
+ Mirror test_make_tarball, except filename contains latin characters.
+ """
+ self._make_tarball('Ã¥rchiv') # note this isn't a real word
+
+ @unittest.skipUnless(ZLIB_SUPPORT, 'Need zlib support to run')
+ @unittest.skipUnless(can_fs_encode('ã®ã‚¢ãƒ¼ã‚«ã‚¤ãƒ–'),
+ 'File system cannot handle this filename')
+ def test_make_tarball_extended(self):
+ """
+ Mirror test_make_tarball, except filename contains extended
+ characters outside the latin charset.
+ """
+ self._make_tarball('ã®ã‚¢ãƒ¼ã‚«ã‚¤ãƒ–') # japanese for archive
+
+ def _make_tarball(self, target_name):
# creating something to tar
tmpdir = self.mkdtemp()
self.write_file([tmpdir, 'file1'], 'xxx')
@@ -43,7 +79,7 @@ class ArchiveUtilTestCase(support.TempdirManager,
unittest.skipUnless(splitdrive(tmpdir)[0] == splitdrive(tmpdir2)[0],
"Source and target should be on same drive")
- base_name = os.path.join(tmpdir2, 'archive')
+ base_name = os.path.join(tmpdir2, target_name)
# working with relative paths to avoid tar warnings
old_dir = os.getcwd()
@@ -58,7 +94,7 @@ class ArchiveUtilTestCase(support.TempdirManager,
self.assertTrue(os.path.exists(tarball))
# trying an uncompressed one
- base_name = os.path.join(tmpdir2, 'archive')
+ base_name = os.path.join(tmpdir2, target_name)
old_dir = os.getcwd()
os.chdir(tmpdir)
try:
diff --git a/Lib/distutils/tests/test_bdist_rpm.py b/Lib/distutils/tests/test_bdist_rpm.py
index ab7a1bf..b090b79 100644
--- a/Lib/distutils/tests/test_bdist_rpm.py
+++ b/Lib/distutils/tests/test_bdist_rpm.py
@@ -28,6 +28,11 @@ class BuildRpmTestCase(support.TempdirManager,
unittest.TestCase):
def setUp(self):
+ try:
+ sys.executable.encode("UTF-8")
+ except UnicodeEncodeError:
+ raise unittest.SkipTest("sys.executable is not encodable to UTF-8")
+
super(BuildRpmTestCase, self).setUp()
self.old_location = os.getcwd()
self.old_sys_argv = sys.argv, sys.argv[:]
@@ -42,7 +47,7 @@ class BuildRpmTestCase(support.TempdirManager,
# XXX I am unable yet to make this test work without
# spurious sdtout/stderr output under Mac OS X
- if sys.platform != 'linux2':
+ if not sys.platform.startswith('linux'):
return
# this test will run only if the rpm commands are found
@@ -86,7 +91,7 @@ class BuildRpmTestCase(support.TempdirManager,
# XXX I am unable yet to make this test work without
# spurious sdtout/stderr output under Mac OS X
- if sys.platform != 'linux2':
+ if not sys.platform.startswith('linux'):
return
# http://bugs.python.org/issue1533164
diff --git a/Lib/distutils/tests/test_sysconfig.py b/Lib/distutils/tests/test_sysconfig.py
index fbe26bf..826ea42 100644
--- a/Lib/distutils/tests/test_sysconfig.py
+++ b/Lib/distutils/tests/test_sysconfig.py
@@ -53,6 +53,35 @@ class SysconfigTestCase(support.EnvironGuard,
self.assertTrue(isinstance(cvars, dict))
self.assertTrue(cvars)
+ def test_srcdir(self):
+ # See Issues #15322, #15364.
+ srcdir = sysconfig.get_config_var('srcdir')
+
+ self.assertTrue(os.path.isabs(srcdir), srcdir)
+ self.assertTrue(os.path.isdir(srcdir), srcdir)
+
+ if sysconfig.python_build:
+ # The python executable has not been installed so srcdir
+ # should be a full source checkout.
+ Python_h = os.path.join(srcdir, 'Include', 'Python.h')
+ self.assertTrue(os.path.exists(Python_h), Python_h)
+ self.assertTrue(sysconfig._is_python_source_dir(srcdir))
+ elif os.name == 'posix':
+ self.assertEqual(os.path.dirname(sysconfig.get_makefile_filename()),
+ srcdir)
+
+ def test_srcdir_independent_of_cwd(self):
+ # srcdir should be independent of the current working directory
+ # See Issues #15322, #15364.
+ srcdir = sysconfig.get_config_var('srcdir')
+ cwd = os.getcwd()
+ try:
+ os.chdir('..')
+ srcdir2 = sysconfig.get_config_var('srcdir')
+ finally:
+ os.chdir(cwd)
+ self.assertEqual(srcdir, srcdir2)
+
def test_customize_compiler(self):
# not testing if default compiler is not unix
@@ -102,7 +131,27 @@ class SysconfigTestCase(support.EnvironGuard,
import sysconfig as global_sysconfig
self.assertEqual(global_sysconfig.get_config_var('CFLAGS'), sysconfig.get_config_var('CFLAGS'))
self.assertEqual(global_sysconfig.get_config_var('LDFLAGS'), sysconfig.get_config_var('LDFLAGS'))
- self.assertEqual(global_sysconfig.get_config_var('LDSHARED'),sysconfig.get_config_var('LDSHARED'))
+
+ @unittest.skipIf(sysconfig.get_config_var('CUSTOMIZED_OSX_COMPILER'),'compiler flags customized')
+ def test_sysconfig_compiler_vars(self):
+ # On OS X, binary installers support extension module building on
+ # various levels of the operating system with differing Xcode
+ # configurations. This requires customization of some of the
+ # compiler configuration directives to suit the environment on
+ # the installed machine. Some of these customizations may require
+ # running external programs and, so, are deferred until needed by
+ # the first extension module build. With Python 3.3, only
+ # the Distutils version of sysconfig is used for extension module
+ # builds, which happens earlier in the Distutils tests. This may
+ # cause the following tests to fail since no tests have caused
+ # the global version of sysconfig to call the customization yet.
+ # The solution for now is to simply skip this test in this case.
+ # The longer-term solution is to only have one version of sysconfig.
+
+ import sysconfig as global_sysconfig
+ if sysconfig.get_config_var('CUSTOMIZED_OSX_COMPILER'):
+ return
+ self.assertEqual(global_sysconfig.get_config_var('LDSHARED'), sysconfig.get_config_var('LDSHARED'))
self.assertEqual(global_sysconfig.get_config_var('CC'), sysconfig.get_config_var('CC'))
diff --git a/Lib/distutils/tests/test_util.py b/Lib/distutils/tests/test_util.py
index 1a06d4c..eac9b51 100644
--- a/Lib/distutils/tests/test_util.py
+++ b/Lib/distutils/tests/test_util.py
@@ -13,6 +13,7 @@ from distutils import util # used to patch _environ_checked
from distutils.sysconfig import get_config_vars
from distutils import sysconfig
from distutils.tests import support
+import _osx_support
class UtilTestCase(support.EnvironGuard, unittest.TestCase):
@@ -92,6 +93,7 @@ class UtilTestCase(support.EnvironGuard, unittest.TestCase):
('Darwin Kernel Version 8.11.1: '
'Wed Oct 10 18:23:28 PDT 2007; '
'root:xnu-792.25.20~1/RELEASE_I386'), 'i386'))
+ _osx_support._remove_original_values(get_config_vars())
get_config_vars()['MACOSX_DEPLOYMENT_TARGET'] = '10.3'
get_config_vars()['CFLAGS'] = ('-fno-strict-aliasing -DNDEBUG -g '
@@ -105,6 +107,7 @@ class UtilTestCase(support.EnvironGuard, unittest.TestCase):
sys.maxsize = cursize
# macbook with fat binaries (fat, universal or fat64)
+ _osx_support._remove_original_values(get_config_vars())
get_config_vars()['MACOSX_DEPLOYMENT_TARGET'] = '10.4'
get_config_vars()['CFLAGS'] = ('-arch ppc -arch i386 -isysroot '
'/Developer/SDKs/MacOSX10.4u.sdk '
@@ -113,10 +116,12 @@ class UtilTestCase(support.EnvironGuard, unittest.TestCase):
self.assertEqual(get_platform(), 'macosx-10.4-fat')
+ _osx_support._remove_original_values(get_config_vars())
os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.1'
self.assertEqual(get_platform(), 'macosx-10.4-fat')
+ _osx_support._remove_original_values(get_config_vars())
get_config_vars()['CFLAGS'] = ('-arch x86_64 -arch i386 -isysroot '
'/Developer/SDKs/MacOSX10.4u.sdk '
'-fno-strict-aliasing -fno-common '
@@ -124,18 +129,21 @@ class UtilTestCase(support.EnvironGuard, unittest.TestCase):
self.assertEqual(get_platform(), 'macosx-10.4-intel')
+ _osx_support._remove_original_values(get_config_vars())
get_config_vars()['CFLAGS'] = ('-arch x86_64 -arch ppc -arch i386 -isysroot '
'/Developer/SDKs/MacOSX10.4u.sdk '
'-fno-strict-aliasing -fno-common '
'-dynamic -DNDEBUG -g -O3')
self.assertEqual(get_platform(), 'macosx-10.4-fat3')
+ _osx_support._remove_original_values(get_config_vars())
get_config_vars()['CFLAGS'] = ('-arch ppc64 -arch x86_64 -arch ppc -arch i386 -isysroot '
'/Developer/SDKs/MacOSX10.4u.sdk '
'-fno-strict-aliasing -fno-common '
'-dynamic -DNDEBUG -g -O3')
self.assertEqual(get_platform(), 'macosx-10.4-universal')
+ _osx_support._remove_original_values(get_config_vars())
get_config_vars()['CFLAGS'] = ('-arch x86_64 -arch ppc64 -isysroot '
'/Developer/SDKs/MacOSX10.4u.sdk '
'-fno-strict-aliasing -fno-common '
@@ -144,6 +152,7 @@ class UtilTestCase(support.EnvironGuard, unittest.TestCase):
self.assertEqual(get_platform(), 'macosx-10.4-fat64')
for arch in ('ppc', 'i386', 'x86_64', 'ppc64'):
+ _osx_support._remove_original_values(get_config_vars())
get_config_vars()['CFLAGS'] = ('-arch %s -isysroot '
'/Developer/SDKs/MacOSX10.4u.sdk '
'-fno-strict-aliasing -fno-common '
diff --git a/Lib/distutils/unixccompiler.py b/Lib/distutils/unixccompiler.py
index c70a3cc..094a2f0 100644
--- a/Lib/distutils/unixccompiler.py
+++ b/Lib/distutils/unixccompiler.py
@@ -23,6 +23,9 @@ from distutils.errors import \
DistutilsExecError, CompileError, LibError, LinkError
from distutils import log
+if sys.platform == 'darwin':
+ import _osx_support
+
# XXX Things not currently handled:
# * optimization/debug/warning flags; we just use whatever's in Python's
# Makefile and live with it. Is this adequate? If not, we might
@@ -38,68 +41,6 @@ from distutils import log
# should just happily stuff them into the preprocessor/compiler/linker
# options and carry on.
-def _darwin_compiler_fixup(compiler_so, cc_args):
- """
- This function will strip '-isysroot PATH' and '-arch ARCH' from the
- compile flags if the user has specified one them in extra_compile_flags.
-
- This is needed because '-arch ARCH' adds another architecture to the
- build, without a way to remove an architecture. Furthermore GCC will
- barf if multiple '-isysroot' arguments are present.
- """
- stripArch = stripSysroot = False
-
- compiler_so = list(compiler_so)
- kernel_version = os.uname()[2] # 8.4.3
- major_version = int(kernel_version.split('.')[0])
-
- if major_version < 8:
- # OSX before 10.4.0, these don't support -arch and -isysroot at
- # all.
- stripArch = stripSysroot = True
- else:
- stripArch = '-arch' in cc_args
- stripSysroot = '-isysroot' in cc_args
-
- if stripArch or 'ARCHFLAGS' in os.environ:
- while True:
- try:
- index = compiler_so.index('-arch')
- # Strip this argument and the next one:
- del compiler_so[index:index+2]
- except ValueError:
- break
-
- if 'ARCHFLAGS' in os.environ and not stripArch:
- # User specified different -arch flags in the environ,
- # see also distutils.sysconfig
- compiler_so = compiler_so + os.environ['ARCHFLAGS'].split()
-
- if stripSysroot:
- try:
- index = compiler_so.index('-isysroot')
- # Strip this argument and the next one:
- del compiler_so[index:index+2]
- except ValueError:
- pass
-
- # Check if the SDK that is used during compilation actually exists,
- # the universal build requires the usage of a universal SDK and not all
- # users have that installed by default.
- sysroot = None
- if '-isysroot' in cc_args:
- idx = cc_args.index('-isysroot')
- sysroot = cc_args[idx+1]
- elif '-isysroot' in compiler_so:
- idx = compiler_so.index('-isysroot')
- sysroot = compiler_so[idx+1]
-
- if sysroot and not os.path.isdir(sysroot):
- log.warn("Compiling with an SDK that doesn't seem to exist: %s",
- sysroot)
- log.warn("Please check your Xcode installation")
-
- return compiler_so
class UnixCCompiler(CCompiler):
@@ -168,7 +109,8 @@ class UnixCCompiler(CCompiler):
def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts):
compiler_so = self.compiler_so
if sys.platform == 'darwin':
- compiler_so = _darwin_compiler_fixup(compiler_so, cc_args + extra_postargs)
+ compiler_so = _osx_support.compiler_fixup(compiler_so,
+ cc_args + extra_postargs)
try:
self.spawn(compiler_so + cc_args + [src, '-o', obj] +
extra_postargs)
@@ -247,7 +189,7 @@ class UnixCCompiler(CCompiler):
linker[i] = self.compiler_cxx[i]
if sys.platform == 'darwin':
- linker = _darwin_compiler_fixup(linker, ld_args)
+ linker = _osx_support.compiler_fixup(linker, ld_args)
self.spawn(linker + ld_args)
except DistutilsExecError as msg:
diff --git a/Lib/distutils/util.py b/Lib/distutils/util.py
index bce8402..67d8166 100644
--- a/Lib/distutils/util.py
+++ b/Lib/distutils/util.py
@@ -53,6 +53,10 @@ def get_platform ():
return 'win-ia64'
return sys.platform
+ # Set for cross builds explicitly
+ if "_PYTHON_HOST_PLATFORM" in os.environ:
+ return os.environ["_PYTHON_HOST_PLATFORM"]
+
if os.name != "posix" or not hasattr(os, 'uname'):
# XXX what about the architecture? NT is Intel or Alpha,
# Mac OS is M68k or PPC, etc.
@@ -94,94 +98,10 @@ def get_platform ():
if m:
release = m.group()
elif osname[:6] == "darwin":
- #
- # For our purposes, we'll assume that the system version from
- # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
- # to. This makes the compatibility story a bit more sane because the
- # machine is going to compile and link as if it were
- # MACOSX_DEPLOYMENT_TARGET.
- from distutils.sysconfig import get_config_vars
- cfgvars = get_config_vars()
-
- macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
-
- if 1:
- # Always calculate the release of the running machine,
- # needed to determine if we can build fat binaries or not.
-
- macrelease = macver
- # Get the system version. Reading this plist is a documented
- # way to get the system version (see the documentation for
- # the Gestalt Manager)
- try:
- f = open('/System/Library/CoreServices/SystemVersion.plist')
- except IOError:
- # We're on a plain darwin box, fall back to the default
- # behaviour.
- pass
- else:
- try:
- m = re.search(
- r'<key>ProductUserVisibleVersion</key>\s*' +
- r'<string>(.*?)</string>', f.read())
- if m is not None:
- macrelease = '.'.join(m.group(1).split('.')[:2])
- # else: fall back to the default behaviour
- finally:
- f.close()
-
- if not macver:
- macver = macrelease
-
- if macver:
- from distutils.sysconfig import get_config_vars
- release = macver
- osname = "macosx"
-
- if (macrelease + '.') >= '10.4.' and \
- '-arch' in get_config_vars().get('CFLAGS', '').strip():
- # The universal build will build fat binaries, but not on
- # systems before 10.4
- #
- # Try to detect 4-way universal builds, those have machine-type
- # 'universal' instead of 'fat'.
-
- machine = 'fat'
- cflags = get_config_vars().get('CFLAGS')
-
- archs = re.findall('-arch\s+(\S+)', cflags)
- archs = tuple(sorted(set(archs)))
-
- if len(archs) == 1:
- machine = archs[0]
- elif archs == ('i386', 'ppc'):
- machine = 'fat'
- elif archs == ('i386', 'x86_64'):
- machine = 'intel'
- elif archs == ('i386', 'ppc', 'x86_64'):
- machine = 'fat3'
- elif archs == ('ppc64', 'x86_64'):
- machine = 'fat64'
- elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
- machine = 'universal'
- else:
- raise ValueError(
- "Don't know machine value for archs=%r"%(archs,))
-
- elif machine == 'i386':
- # On OSX the machine type returned by uname is always the
- # 32-bit variant, even if the executable architecture is
- # the 64-bit variant
- if sys.maxsize >= 2**32:
- machine = 'x86_64'
-
- elif machine in ('PowerPC', 'Power_Macintosh'):
- # Pick a sane name for the PPC architecture.
- machine = 'ppc'
-
- # See 'i386' case
- if sys.maxsize >= 2**32:
- machine = 'ppc64'
+ import _osx_support, distutils.sysconfig
+ osname, release, machine = _osx_support.get_platform_osx(
+ distutils.sysconfig.get_config_vars(),
+ osname, release, machine)
return "%s-%s-%s" % (osname, release, machine)
diff --git a/Lib/doctest.py b/Lib/doctest.py
index e189c8f..3af05fb 100644
--- a/Lib/doctest.py
+++ b/Lib/doctest.py
@@ -458,7 +458,6 @@ class Example:
return hash((self.source, self.want, self.lineno, self.indent,
self.exc_msg))
-
class DocTest:
"""
A collection of doctest examples that should be run in a single
@@ -1367,7 +1366,7 @@ class DocTestRunner:
m = self.__LINECACHE_FILENAME_RE.match(filename)
if m and m.group('name') == self.test.name:
example = self.test.examples[int(m.group('examplenum'))]
- return example.source.splitlines(True)
+ return example.source.splitlines(keepends=True)
else:
return self.save_linecache_getlines(filename, module_globals)
@@ -1413,6 +1412,7 @@ class DocTestRunner:
# Note that the interactive output will go to *our*
# save_stdout, even if that's not the real sys.stdout; this
# allows us to write test cases for the set_trace behavior.
+ save_trace = sys.gettrace()
save_set_trace = pdb.set_trace
self.debugger = _OutputRedirectingPdb(save_stdout)
self.debugger.reset()
@@ -1432,6 +1432,7 @@ class DocTestRunner:
finally:
sys.stdout = save_stdout
pdb.set_trace = save_set_trace
+ sys.settrace(save_trace)
linecache.getlines = self.save_linecache_getlines
sys.displayhook = save_displayhook
if clear_globs:
@@ -1628,8 +1629,8 @@ class OutputChecker:
# Check if we should use diff.
if self._do_a_fancy_diff(want, got, optionflags):
# Split want & got into lines.
- want_lines = want.splitlines(True) # True == keep line ends
- got_lines = got.splitlines(True)
+ want_lines = want.splitlines(keepends=True)
+ got_lines = got.splitlines(keepends=True)
# Use difflib to find their differences.
if optionflags & REPORT_UDIFF:
diff = difflib.unified_diff(want_lines, got_lines, n=2)
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py
new file mode 100644
index 0000000..e9f6e20
--- /dev/null
+++ b/Lib/email/_encoded_words.py
@@ -0,0 +1,221 @@
+""" Routines for manipulating RFC2047 encoded words.
+
+This is currently a package-private API, but will be considered for promotion
+to a public API if there is demand.
+
+"""
+
+# An ecoded word looks like this:
+#
+# =?charset[*lang]?cte?encoded_string?=
+#
+# for more information about charset see the charset module. Here it is one
+# of the preferred MIME charset names (hopefully; you never know when parsing).
+# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In
+# theory other letters could be used for other encodings, but in practice this
+# (almost?) never happens. There could be a public API for adding entries
+# to to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is
+# Base64. The meaning of encoded_string should be obvious. 'lang' is optional
+# as indicated by the brackets (they are not part of the syntax) but is almost
+# never encountered in practice.
+#
+# The general interface for a CTE decoder is that it takes the encoded_string
+# as its argument, and returns a tuple (cte_decoded_string, defects). The
+# cte_decoded_string is the original binary that was encoded using the
+# specified cte. 'defects' is a list of MessageDefect instances indicating any
+# problems encountered during conversion. 'charset' and 'lang' are the
+# corresponding strings extracted from the EW, case preserved.
+#
+# The general interface for a CTE encoder is that it takes a binary sequence
+# as input and returns the cte_encoded_string, which is an ascii-only string.
+#
+# Each decoder must also supply a length function that takes the binary
+# sequence as its argument and returns the length of the resulting encoded
+# string.
+#
+# The main API functions for the module are decode, which calls the decoder
+# referenced by the cte specifier, and encode, which adds the appropriate
+# RFC 2047 "chrome" to the encoded string, and can optionally automatically
+# select the shortest possible encoding. See their docstrings below for
+# details.
+
+import re
+import base64
+import binascii
+import functools
+from string import ascii_letters, digits
+from email import errors
+
+__all__ = ['decode_q',
+ 'encode_q',
+ 'decode_b',
+ 'encode_b',
+ 'len_q',
+ 'len_b',
+ 'decode',
+ 'encode',
+ ]
+
+#
+# Quoted Printable
+#
+
+# regex based decoder.
+_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
+ lambda m: bytes([int(m.group(1), 16)]))
+
+def decode_q(encoded):
+ encoded = encoded.replace(b'_', b' ')
+ return _q_byte_subber(encoded), []
+
+
+# dict mapping bytes to their encoded form
+class _QByteMap(dict):
+
+ safe = b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')
+
+ def __missing__(self, key):
+ if key in self.safe:
+ self[key] = chr(key)
+ else:
+ self[key] = "={:02X}".format(key)
+ return self[key]
+
+_q_byte_map = _QByteMap()
+
+# In headers spaces are mapped to '_'.
+_q_byte_map[ord(' ')] = '_'
+
+def encode_q(bstring):
+ return ''.join(_q_byte_map[x] for x in bstring)
+
+def len_q(bstring):
+ return sum(len(_q_byte_map[x]) for x in bstring)
+
+
+#
+# Base64
+#
+
+def decode_b(encoded):
+ defects = []
+ pad_err = len(encoded) % 4
+ if pad_err:
+ defects.append(errors.InvalidBase64PaddingDefect())
+ padded_encoded = encoded + b'==='[:4-pad_err]
+ else:
+ padded_encoded = encoded
+ try:
+ return base64.b64decode(padded_encoded, validate=True), defects
+ except binascii.Error:
+ # Since we had correct padding, this must an invalid char error.
+ defects = [errors.InvalidBase64CharactersDefect()]
+ # The non-alphabet characters are ignored as far as padding
+ # goes, but we don't know how many there are. So we'll just
+ # try various padding lengths until something works.
+ for i in 0, 1, 2, 3:
+ try:
+ return base64.b64decode(encoded+b'='*i, validate=False), defects
+ except binascii.Error:
+ if i==0:
+ defects.append(errors.InvalidBase64PaddingDefect())
+ else:
+ # This should never happen.
+ raise AssertionError("unexpected binascii.Error")
+
+def encode_b(bstring):
+ return base64.b64encode(bstring).decode('ascii')
+
+def len_b(bstring):
+ groups_of_3, leftover = divmod(len(bstring), 3)
+ # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+ return groups_of_3 * 4 + (4 if leftover else 0)
+
+
+_cte_decoders = {
+ 'q': decode_q,
+ 'b': decode_b,
+ }
+
+def decode(ew):
+ """Decode encoded word and return (string, charset, lang, defects) tuple.
+
+ An RFC 2047/2243 encoded word has the form:
+
+ =?charset*lang?cte?encoded_string?=
+
+ where '*lang' may be omitted but the other parts may not be.
+
+ This function expects exactly such a string (that is, it does not check the
+ syntax and may raise errors if the string is not well formed), and returns
+ the encoded_string decoded first from its Content Transfer Encoding and
+ then from the resulting bytes into unicode using the specified charset. If
+ the cte-decoded string does not successfully decode using the specified
+ character set, a defect is added to the defects list and the unknown octets
+ are replaced by the unicode 'unknown' character \uFDFF.
+
+ The specified charset and language are returned. The default for language,
+ which is rarely if ever encountered, is the empty string.
+
+ """
+ _, charset, cte, cte_string, _ = ew.split('?')
+ charset, _, lang = charset.partition('*')
+ cte = cte.lower()
+ # Recover the original bytes and do CTE decoding.
+ bstring = cte_string.encode('ascii', 'surrogateescape')
+ bstring, defects = _cte_decoders[cte](bstring)
+ # Turn the CTE decoded bytes into unicode.
+ try:
+ string = bstring.decode(charset)
+ except UnicodeError:
+ defects.append(errors.UndecodableBytesDefect("Encoded word "
+ "contains bytes not decodable using {} charset".format(charset)))
+ string = bstring.decode(charset, 'surrogateescape')
+ except LookupError:
+ string = bstring.decode('ascii', 'surrogateescape')
+ if charset.lower() != 'unknown-8bit':
+ defects.append(errors.CharsetError("Unknown charset {} "
+ "in encoded word; decoded as unknown bytes".format(charset)))
+ return string, charset, lang, defects
+
+
+_cte_encoders = {
+ 'q': encode_q,
+ 'b': encode_b,
+ }
+
+_cte_encode_length = {
+ 'q': len_q,
+ 'b': len_b,
+ }
+
+def encode(string, charset='utf-8', encoding=None, lang=''):
+ """Encode string using the CTE encoding that produces the shorter result.
+
+ Produces an RFC 2047/2243 encoded word of the form:
+
+ =?charset*lang?cte?encoded_string?=
+
+ where '*lang' is omitted unless the 'lang' parameter is given a value.
+ Optional argument charset (defaults to utf-8) specifies the charset to use
+ to encode the string to binary before CTE encoding it. Optional argument
+ 'encoding' is the cte specifier for the encoding that should be used ('q'
+ or 'b'); if it is None (the default) the encoding which produces the
+ shortest encoded sequence is used, except that 'q' is preferred if it is up
+ to five characters longer. Optional argument 'lang' (default '') gives the
+ RFC 2243 language string to specify in the encoded word.
+
+ """
+ if charset == 'unknown-8bit':
+ bstring = string.encode('ascii', 'surrogateescape')
+ else:
+ bstring = string.encode(charset)
+ if encoding is None:
+ qlen = _cte_encode_length['q'](bstring)
+ blen = _cte_encode_length['b'](bstring)
+ # Bias toward q. 5 is arbitrary.
+ encoding = 'q' if qlen - blen < 5 else 'b'
+ encoded = _cte_encoders[encoding](bstring)
+ if lang:
+ lang = '*' + lang
+ return "=?{}{}?{}?{}?=".format(charset, lang, encoding, encoded)
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
new file mode 100644
index 0000000..1924ed1
--- /dev/null
+++ b/Lib/email/_header_value_parser.py
@@ -0,0 +1,2953 @@
+"""Header value parser implementing various email-related RFC parsing rules.
+
+The parsing methods defined in this module implement various email related
+parsing rules. Principal among them is RFC 5322, which is the followon
+to RFC 2822 and primarily a clarification of the former. It also implements
+RFC 2047 encoded word decoding.
+
+RFC 5322 goes to considerable trouble to maintain backward compatibility with
+RFC 822 in the parse phase, while cleaning up the structure on the generation
+phase. This parser supports correct RFC 5322 generation by tagging white space
+as folding white space only when folding is allowed in the non-obsolete rule
+sets. Actually, the parser is even more generous when accepting input than RFC
+5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages.
+Where possible deviations from the standard are annotated on the 'defects'
+attribute of tokens that deviate.
+
+The general structure of the parser follows RFC 5322, and uses its terminology
+where there is a direct correspondence. Where the implementation requires a
+somewhat different structure than that used by the formal grammar, new terms
+that mimic the closest existing terms are used. Thus, it really helps to have
+a copy of RFC 5322 handy when studying this code.
+
+Input to the parser is a string that has already been unfolded according to
+RFC 5322 rules. According to the RFC this unfolding is the very first step, and
+this parser leaves the unfolding step to a higher level message parser, which
+will have already detected the line breaks that need unfolding while
+determining the beginning and end of each header.
+
+The output of the parser is a TokenList object, which is a list subclass. A
+TokenList is a recursive data structure. The terminal nodes of the structure
+are Terminal objects, which are subclasses of str. These do not correspond
+directly to terminal objects in the formal grammar, but are instead more
+practical higher level combinations of true terminals.
+
+All TokenList and Terminal objects have a 'value' attribute, which produces the
+semantically meaningful value of that part of the parse subtree. The value of
+all whitespace tokens (no matter how many sub-tokens they may contain) is a
+single space, as per the RFC rules. This includes 'CFWS', which is herein
+included in the general class of whitespace tokens. There is one exception to
+the rule that whitespace tokens are collapsed into single spaces in values: in
+the value of a 'bare-quoted-string' (a quoted-string with no leading or
+trailing whitespace), any whitespace that appeared between the quotation marks
+is preserved in the returned value. Note that in all Terminal strings quoted
+pairs are turned into their unquoted values.
+
+All TokenList and Terminal objects also have a string value, which attempts to
+be a "canonical" representation of the RFC-compliant form of the substring that
+produced the parsed subtree, including minimal use of quoted pair quoting.
+Whitespace runs are not collapsed.
+
+Comment tokens also have a 'content' attribute providing the string found
+between the parens (including any nested comments) with whitespace preserved.
+
+All TokenList and Terminal objects have a 'defects' attribute which is a
+possibly empty list all of the defects found while creating the token. Defects
+may appear on any token in the tree, and a composite list of all defects in the
+subtree is available through the 'all_defects' attribute of any node. (For
+Terminal notes x.defects == x.all_defects.)
+
+Each object in a parse tree is called a 'token', and each has a 'token_type'
+attribute that gives the name from the RFC 5322 grammar that it represents.
+Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that
+may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters.
+It is returned in place of lists of (ctext/quoted-pair) and
+(qtext/quoted-pair).
+
+XXX: provide complete list of token types.
+"""
+
+import re
+import urllib # For urllib.parse.unquote
+from collections import namedtuple, OrderedDict
+from email import _encoded_words as _ew
+from email import errors
+from email import utils
+
+#
+# Useful constants and functions
+#
+
+WSP = set(' \t')
+CFWS_LEADER = WSP | set('(')
+SPECIALS = set(r'()<>@,:;.\"[]')
+ATOM_ENDS = SPECIALS | WSP
+DOT_ATOM_ENDS = ATOM_ENDS - set('.')
+# '.', '"', and '(' do not end phrases in order to support obs-phrase
+PHRASE_ENDS = SPECIALS - set('."(')
+TSPECIALS = (SPECIALS | set('/?=')) - set('.')
+TOKEN_ENDS = TSPECIALS | WSP
+ASPECIALS = TSPECIALS | set("*'%")
+ATTRIBUTE_ENDS = ASPECIALS | WSP
+EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
+
+def quote_string(value):
+ return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+
+#
+# Accumulator for header folding
+#
+
+class _Folded:
+
+ def __init__(self, maxlen, policy):
+ self.maxlen = maxlen
+ self.policy = policy
+ self.lastlen = 0
+ self.stickyspace = None
+ self.firstline = True
+ self.done = []
+ self.current = []
+
+ def newline(self):
+ self.done.extend(self.current)
+ self.done.append(self.policy.linesep)
+ self.current.clear()
+ self.lastlen = 0
+
+ def finalize(self):
+ if self.current:
+ self.newline()
+
+ def __str__(self):
+ return ''.join(self.done)
+
+ def append(self, stoken):
+ self.current.append(stoken)
+
+ def append_if_fits(self, token, stoken=None):
+ if stoken is None:
+ stoken = str(token)
+ l = len(stoken)
+ if self.stickyspace is not None:
+ stickyspace_len = len(self.stickyspace)
+ if self.lastlen + stickyspace_len + l <= self.maxlen:
+ self.current.append(self.stickyspace)
+ self.lastlen += stickyspace_len
+ self.current.append(stoken)
+ self.lastlen += l
+ self.stickyspace = None
+ self.firstline = False
+ return True
+ if token.has_fws:
+ ws = token.pop_leading_fws()
+ if ws is not None:
+ self.stickyspace += str(ws)
+ stickyspace_len += len(ws)
+ token._fold(self)
+ return True
+ if stickyspace_len and l + 1 <= self.maxlen:
+ margin = self.maxlen - l
+ if 0 < margin < stickyspace_len:
+ trim = stickyspace_len - margin
+ self.current.append(self.stickyspace[:trim])
+ self.stickyspace = self.stickyspace[trim:]
+ stickyspace_len = trim
+ self.newline()
+ self.current.append(self.stickyspace)
+ self.current.append(stoken)
+ self.lastlen = l + stickyspace_len
+ self.stickyspace = None
+ self.firstline = False
+ return True
+ if not self.firstline:
+ self.newline()
+ self.current.append(self.stickyspace)
+ self.current.append(stoken)
+ self.stickyspace = None
+ self.firstline = False
+ return True
+ if self.lastlen + l <= self.maxlen:
+ self.current.append(stoken)
+ self.lastlen += l
+ return True
+ if l < self.maxlen:
+ self.newline()
+ self.current.append(stoken)
+ self.lastlen = l
+ return True
+ return False
+
+#
+# TokenList and its subclasses
+#
+
+class TokenList(list):
+
+ token_type = None
+
+ def __init__(self, *args, **kw):
+ super().__init__(*args, **kw)
+ self.defects = []
+
+ def __str__(self):
+ return ''.join(str(x) for x in self)
+
+ def __repr__(self):
+ return '{}({})'.format(self.__class__.__name__,
+ super().__repr__())
+
+ @property
+ def value(self):
+ return ''.join(x.value for x in self if x.value)
+
+ @property
+ def all_defects(self):
+ return sum((x.all_defects for x in self), self.defects)
+
+ #
+ # Folding API
+ #
+ # parts():
+ #
+ # return a list of objects that constitute the "higher level syntactic
+ # objects" specified by the RFC as the best places to fold a header line.
+ # The returned objects must include leading folding white space, even if
+ # this means mutating the underlying parse tree of the object. Each object
+ # is only responsible for returning *its* parts, and should not drill down
+ # to any lower level except as required to meet the leading folding white
+ # space constraint.
+ #
+ # _fold(folded):
+ #
+ # folded: the result accumulator. This is an instance of _Folded.
+ # (XXX: I haven't finished factoring this out yet, the folding code
+ # pretty much uses this as a state object.) When the folded.current
+ # contains as much text as will fit, the _fold method should call
+ # folded.newline.
+ # folded.lastlen: the current length of the test stored in folded.current.
+ # folded.maxlen: The maximum number of characters that may appear on a
+ # folded line. Differs from the policy setting in that "no limit" is
+ # represented by +inf, which means it can be used in the trivially
+ # logical fashion in comparisons.
+ #
+ # Currently no subclasses implement parts, and I think this will remain
+ # true. A subclass only needs to implement _fold when the generic version
+ # isn't sufficient. _fold will need to be implemented primarily when it is
+ # possible for encoded words to appear in the specialized token-list, since
+ # there is no generic algorithm that can know where exactly the encoded
+ # words are allowed. A _fold implementation is responsible for filling
+ # lines in the same general way that the top level _fold does. It may, and
+ # should, call the _fold method of sub-objects in a similar fashion to that
+ # of the top level _fold.
+ #
+ # XXX: I'm hoping it will be possible to factor the existing code further
+ # to reduce redundancy and make the logic clearer.
+
+ @property
+ def parts(self):
+ klass = self.__class__
+ this = []
+ for token in self:
+ if token.startswith_fws():
+ if this:
+ yield this[0] if len(this)==1 else klass(this)
+ this.clear()
+ end_ws = token.pop_trailing_ws()
+ this.append(token)
+ if end_ws:
+ yield klass(this)
+ this = [end_ws]
+ if this:
+ yield this[0] if len(this)==1 else klass(this)
+
+ def startswith_fws(self):
+ return self[0].startswith_fws()
+
+ def pop_leading_fws(self):
+ if self[0].token_type == 'fws':
+ return self.pop(0)
+ return self[0].pop_leading_fws()
+
+ def pop_trailing_ws(self):
+ if self[-1].token_type == 'cfws':
+ return self.pop(-1)
+ return self[-1].pop_trailing_ws()
+
+ @property
+ def has_fws(self):
+ for part in self:
+ if part.has_fws:
+ return True
+ return False
+
+ def has_leading_comment(self):
+ return self[0].has_leading_comment()
+
+ @property
+ def comments(self):
+ comments = []
+ for token in self:
+ comments.extend(token.comments)
+ return comments
+
+ def fold(self, *, policy):
+ # max_line_length 0/None means no limit, ie: infinitely long.
+ maxlen = policy.max_line_length or float("+inf")
+ folded = _Folded(maxlen, policy)
+ self._fold(folded)
+ folded.finalize()
+ return str(folded)
+
+ def as_encoded_word(self, charset):
+ # This works only for things returned by 'parts', which include
+ # the leading fws, if any, that should be used.
+ res = []
+ ws = self.pop_leading_fws()
+ if ws:
+ res.append(ws)
+ trailer = self.pop(-1) if self[-1].token_type=='fws' else ''
+ res.append(_ew.encode(str(self), charset))
+ res.append(trailer)
+ return ''.join(res)
+
+ def cte_encode(self, charset, policy):
+ res = []
+ for part in self:
+ res.append(part.cte_encode(charset, policy))
+ return ''.join(res)
+
+ def _fold(self, folded):
+ for part in self.parts:
+ tstr = str(part)
+ tlen = len(tstr)
+ try:
+ str(part).encode('us-ascii')
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ # XXX: this should be a policy setting
+ charset = 'utf-8'
+ tstr = part.cte_encode(charset, folded.policy)
+ tlen = len(tstr)
+ if folded.append_if_fits(part, tstr):
+ continue
+ # Peel off the leading whitespace if any and make it sticky, to
+ # avoid infinite recursion.
+ ws = part.pop_leading_fws()
+ if ws is not None:
+ # Peel off the leading whitespace and make it sticky, to
+ # avoid infinite recursion.
+ folded.stickyspace = str(part.pop(0))
+ if folded.append_if_fits(part):
+ continue
+ if part.has_fws:
+ part._fold(folded)
+ continue
+ # There are no fold points in this one; it is too long for a single
+ # line and can't be split...we just have to put it on its own line.
+ folded.append(tstr)
+ folded.newline()
+
+ def pprint(self, indent=''):
+ print('\n'.join(self._pp(indent='')))
+
+ def ppstr(self, indent=''):
+ return '\n'.join(self._pp(indent=''))
+
+ def _pp(self, indent=''):
+ yield '{}{}/{}('.format(
+ indent,
+ self.__class__.__name__,
+ self.token_type)
+ for token in self:
+ if not hasattr(token, '_pp'):
+ yield (indent + ' !! invalid element in token '
+ 'list: {!r}'.format(token))
+ else:
+ for line in token._pp(indent+' '):
+ yield line
+ if self.defects:
+ extra = ' Defects: {}'.format(self.defects)
+ else:
+ extra = ''
+ yield '{}){}'.format(indent, extra)
+
+
+class WhiteSpaceTokenList(TokenList):
+
+ @property
+ def value(self):
+ return ' '
+
+ @property
+ def comments(self):
+ return [x.content for x in self if x.token_type=='comment']
+
+
+class UnstructuredTokenList(TokenList):
+
+ token_type = 'unstructured'
+
+ def _fold(self, folded):
+ if any(x.token_type=='encoded-word' for x in self):
+ return self._fold_encoded(folded)
+ # Here we can have either a pure ASCII string that may or may not
+ # have surrogateescape encoded bytes, or a unicode string.
+ last_ew = None
+ for part in self.parts:
+ tstr = str(part)
+ is_ew = False
+ try:
+ str(part).encode('us-ascii')
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ charset = 'utf-8'
+ if last_ew is not None:
+ # We've already done an EW, combine this one with it
+ # if there's room.
+ chunk = get_unstructured(
+ ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
+ oldlastlen = sum(len(x) for x in folded.current[:last_ew])
+ schunk = str(chunk)
+ lchunk = len(schunk)
+ if oldlastlen + lchunk <= folded.maxlen:
+ del folded.current[last_ew:]
+ folded.append(schunk)
+ folded.lastlen = oldlastlen + lchunk
+ continue
+ tstr = part.as_encoded_word(charset)
+ is_ew = True
+ if folded.append_if_fits(part, tstr):
+ if is_ew:
+ last_ew = len(folded.current) - 1
+ continue
+ if is_ew or last_ew:
+ # It's too big to fit on the line, but since we've
+ # got encoded words we can use encoded word folding.
+ part._fold_as_ew(folded)
+ continue
+ # Peel off the leading whitespace if any and make it sticky, to
+ # avoid infinite recursion.
+ ws = part.pop_leading_fws()
+ if ws is not None:
+ folded.stickyspace = str(ws)
+ if folded.append_if_fits(part):
+ continue
+ if part.has_fws:
+ part.fold(folded)
+ continue
+ # It can't be split...we just have to put it on its own line.
+ folded.append(tstr)
+ folded.newline()
+ last_ew = None
+
+ def cte_encode(self, charset, policy):
+ res = []
+ last_ew = None
+ for part in self:
+ spart = str(part)
+ try:
+ spart.encode('us-ascii')
+ res.append(spart)
+ except UnicodeEncodeError:
+ if last_ew is None:
+ res.append(part.cte_encode(charset, policy))
+ last_ew = len(res)
+ else:
+ tl = get_unstructured(''.join(res[last_ew:] + [spart]))
+ res.append(tl.as_encoded_word())
+ return ''.join(res)
+
+
+class Phrase(TokenList):
+
+ token_type = 'phrase'
+
+ def _fold(self, folded):
+ # As with Unstructured, we can have pure ASCII with or without
+ # surrogateescape encoded bytes, or we could have unicode. But this
+ # case is more complicated, since we have to deal with the various
+ # sub-token types and how they can be composed in the face of
+ # unicode-that-needs-CTE-encoding, and the fact that if a token a
+ # comment that becomes a barrier across which we can't compose encoded
+ # words.
+ last_ew = None
+ for part in self.parts:
+ tstr = str(part)
+ tlen = len(tstr)
+ has_ew = False
+ try:
+ str(part).encode('us-ascii')
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ charset = 'utf-8'
+ if last_ew is not None and not part.has_leading_comment():
+ # We've already done an EW, let's see if we can combine
+ # this one with it. The last_ew logic ensures that all we
+ # have at this point is atoms, no comments or quoted
+ # strings. So we can treat the text between the last
+ # encoded word and the content of this token as
+ # unstructured text, and things will work correctly. But
+ # we have to strip off any trailing comment on this token
+ # first, and if it is a quoted string we have to pull out
+ # the content (we're encoding it, so it no longer needs to
+ # be quoted).
+ if part[-1].token_type == 'cfws' and part.comments:
+ remainder = part.pop(-1)
+ else:
+ remainder = ''
+ for i, token in enumerate(part):
+ if token.token_type == 'bare-quoted-string':
+ part[i] = UnstructuredTokenList(token[:])
+ chunk = get_unstructured(
+ ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset)
+ schunk = str(chunk)
+ lchunk = len(schunk)
+ if last_ew + lchunk <= folded.maxlen:
+ del folded.current[last_ew:]
+ folded.append(schunk)
+ folded.lastlen = sum(len(x) for x in folded.current)
+ continue
+ tstr = part.as_encoded_word(charset)
+ tlen = len(tstr)
+ has_ew = True
+ if folded.append_if_fits(part, tstr):
+ if has_ew and not part.comments:
+ last_ew = len(folded.current) - 1
+ elif part.comments or part.token_type == 'quoted-string':
+ # If a comment is involved we can't combine EWs. And if a
+ # quoted string is involved, it's not worth the effort to
+ # try to combine them.
+ last_ew = None
+ continue
+ part._fold(folded)
+
+ def cte_encode(self, charset, policy):
+ res = []
+ last_ew = None
+ is_ew = False
+ for part in self:
+ spart = str(part)
+ try:
+ spart.encode('us-ascii')
+ res.append(spart)
+ except UnicodeEncodeError:
+ is_ew = True
+ if last_ew is None:
+ if not part.comments:
+ last_ew = len(res)
+ res.append(part.cte_encode(charset, policy))
+ elif not part.has_leading_comment():
+ if part[-1].token_type == 'cfws' and part.comments:
+ remainder = part.pop(-1)
+ else:
+ remainder = ''
+ for i, token in enumerate(part):
+ if token.token_type == 'bare-quoted-string':
+ part[i] = UnstructuredTokenList(token[:])
+ tl = get_unstructured(''.join(res[last_ew:] + [spart]))
+ res[last_ew:] = [tl.as_encoded_word(charset)]
+ if part.comments or (not is_ew and part.token_type == 'quoted-string'):
+ last_ew = None
+ return ''.join(res)
+
+class Word(TokenList):
+
+ token_type = 'word'
+
+
+class CFWSList(WhiteSpaceTokenList):
+
+ token_type = 'cfws'
+
+ def has_leading_comment(self):
+ return bool(self.comments)
+
+
+class Atom(TokenList):
+
+ token_type = 'atom'
+
+
+class Token(TokenList):
+
+ token_type = 'token'
+
+
+class EncodedWord(TokenList):
+
+ token_type = 'encoded-word'
+ cte = None
+ charset = None
+ lang = None
+
+ @property
+ def encoded(self):
+ if self.cte is not None:
+ return self.cte
+ _ew.encode(str(self), self.charset)
+
+
+
+class QuotedString(TokenList):
+
+ token_type = 'quoted-string'
+
+ @property
+ def content(self):
+ for x in self:
+ if x.token_type == 'bare-quoted-string':
+ return x.value
+
+ @property
+ def quoted_value(self):
+ res = []
+ for x in self:
+ if x.token_type == 'bare-quoted-string':
+ res.append(str(x))
+ else:
+ res.append(x.value)
+ return ''.join(res)
+
+ @property
+ def stripped_value(self):
+ for token in self:
+ if token.token_type == 'bare-quoted-string':
+ return token.value
+
+
+class BareQuotedString(QuotedString):
+
+ token_type = 'bare-quoted-string'
+
+ def __str__(self):
+ return quote_string(''.join(str(x) for x in self))
+
+ @property
+ def value(self):
+ return ''.join(str(x) for x in self)
+
+
+class Comment(WhiteSpaceTokenList):
+
+ token_type = 'comment'
+
+ def __str__(self):
+ return ''.join(sum([
+ ["("],
+ [self.quote(x) for x in self],
+ [")"],
+ ], []))
+
+ def quote(self, value):
+ if value.token_type == 'comment':
+ return str(value)
+ return str(value).replace('\\', '\\\\').replace(
+ '(', '\(').replace(
+ ')', '\)')
+
+ @property
+ def content(self):
+ return ''.join(str(x) for x in self)
+
+ @property
+ def comments(self):
+ return [self.content]
+
+class AddressList(TokenList):
+
+ token_type = 'address-list'
+
+ @property
+ def addresses(self):
+ return [x for x in self if x.token_type=='address']
+
+ @property
+ def mailboxes(self):
+ return sum((x.mailboxes
+ for x in self if x.token_type=='address'), [])
+
+ @property
+ def all_mailboxes(self):
+ return sum((x.all_mailboxes
+ for x in self if x.token_type=='address'), [])
+
+
+class Address(TokenList):
+
+ token_type = 'address'
+
+ @property
+ def display_name(self):
+ if self[0].token_type == 'group':
+ return self[0].display_name
+
+ @property
+ def mailboxes(self):
+ if self[0].token_type == 'mailbox':
+ return [self[0]]
+ elif self[0].token_type == 'invalid-mailbox':
+ return []
+ return self[0].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if self[0].token_type == 'mailbox':
+ return [self[0]]
+ elif self[0].token_type == 'invalid-mailbox':
+ return [self[0]]
+ return self[0].all_mailboxes
+
+class MailboxList(TokenList):
+
+ token_type = 'mailbox-list'
+
+ @property
+ def mailboxes(self):
+ return [x for x in self if x.token_type=='mailbox']
+
+ @property
+ def all_mailboxes(self):
+ return [x for x in self
+ if x.token_type in ('mailbox', 'invalid-mailbox')]
+
+
+class GroupList(TokenList):
+
+ token_type = 'group-list'
+
+ @property
+ def mailboxes(self):
+ if not self or self[0].token_type != 'mailbox-list':
+ return []
+ return self[0].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if not self or self[0].token_type != 'mailbox-list':
+ return []
+ return self[0].all_mailboxes
+
+
+class Group(TokenList):
+
+ token_type = "group"
+
+ @property
+ def mailboxes(self):
+ if self[2].token_type != 'group-list':
+ return []
+ return self[2].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if self[2].token_type != 'group-list':
+ return []
+ return self[2].all_mailboxes
+
+ @property
+ def display_name(self):
+ return self[0].display_name
+
+
+class NameAddr(TokenList):
+
+ token_type = 'name-addr'
+
+ @property
+ def display_name(self):
+ if len(self) == 1:
+ return None
+ return self[0].display_name
+
+ @property
+ def local_part(self):
+ return self[-1].local_part
+
+ @property
+ def domain(self):
+ return self[-1].domain
+
+ @property
+ def route(self):
+ return self[-1].route
+
+ @property
+ def addr_spec(self):
+ return self[-1].addr_spec
+
+
+class AngleAddr(TokenList):
+
+ token_type = 'angle-addr'
+
+ @property
+ def local_part(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.local_part
+
+ @property
+ def domain(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.domain
+
+ @property
+ def route(self):
+ for x in self:
+ if x.token_type == 'obs-route':
+ return x.domains
+
+ @property
+ def addr_spec(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.addr_spec
+ else:
+ return '<>'
+
+
+class ObsRoute(TokenList):
+
+ token_type = 'obs-route'
+
+ @property
+ def domains(self):
+ return [x.domain for x in self if x.token_type == 'domain']
+
+
+class Mailbox(TokenList):
+
+ token_type = 'mailbox'
+
+ @property
+ def display_name(self):
+ if self[0].token_type == 'name-addr':
+ return self[0].display_name
+
+ @property
+ def local_part(self):
+ return self[0].local_part
+
+ @property
+ def domain(self):
+ return self[0].domain
+
+ @property
+ def route(self):
+ if self[0].token_type == 'name-addr':
+ return self[0].route
+
+ @property
+ def addr_spec(self):
+ return self[0].addr_spec
+
+
+class InvalidMailbox(TokenList):
+
+ token_type = 'invalid-mailbox'
+
+ @property
+ def display_name(self):
+ return None
+
+ local_part = domain = route = addr_spec = display_name
+
+
+class Domain(TokenList):
+
+ token_type = 'domain'
+
+ @property
+ def domain(self):
+ return ''.join(super().value.split())
+
+
+class DotAtom(TokenList):
+
+ token_type = 'dot-atom'
+
+
+class DotAtomText(TokenList):
+
+ token_type = 'dot-atom-text'
+
+
+class AddrSpec(TokenList):
+
+ token_type = 'addr-spec'
+
+ @property
+ def local_part(self):
+ return self[0].local_part
+
+ @property
+ def domain(self):
+ if len(self) < 3:
+ return None
+ return self[-1].domain
+
+ @property
+ def value(self):
+ if len(self) < 3:
+ return self[0].value
+ return self[0].value.rstrip()+self[1].value+self[2].value.lstrip()
+
+ @property
+ def addr_spec(self):
+ nameset = set(self.local_part)
+ if len(nameset) > len(nameset-DOT_ATOM_ENDS):
+ lp = quote_string(self.local_part)
+ else:
+ lp = self.local_part
+ if self.domain is not None:
+ return lp + '@' + self.domain
+ return lp
+
+
+class ObsLocalPart(TokenList):
+
+ token_type = 'obs-local-part'
+
+
+class DisplayName(Phrase):
+
+ token_type = 'display-name'
+
+ @property
+ def display_name(self):
+ res = TokenList(self)
+ if res[0].token_type == 'cfws':
+ res.pop(0)
+ else:
+ if res[0][0].token_type == 'cfws':
+ res[0] = TokenList(res[0][1:])
+ if res[-1].token_type == 'cfws':
+ res.pop()
+ else:
+ if res[-1][-1].token_type == 'cfws':
+ res[-1] = TokenList(res[-1][:-1])
+ return res.value
+
+ @property
+ def value(self):
+ quote = False
+ if self.defects:
+ quote = True
+ else:
+ for x in self:
+ if x.token_type == 'quoted-string':
+ quote = True
+ if quote:
+ pre = post = ''
+ if self[0].token_type=='cfws' or self[0][0].token_type=='cfws':
+ pre = ' '
+ if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws':
+ post = ' '
+ return pre+quote_string(self.display_name)+post
+ else:
+ return super().value
+
+
+class LocalPart(TokenList):
+
+ token_type = 'local-part'
+
+ @property
+ def value(self):
+ if self[0].token_type == "quoted-string":
+ return self[0].quoted_value
+ else:
+ return self[0].value
+
+ @property
+ def local_part(self):
+ # Strip whitespace from front, back, and around dots.
+ res = [DOT]
+ last = DOT
+ last_is_tl = False
+ for tok in self[0] + [DOT]:
+ if tok.token_type == 'cfws':
+ continue
+ if (last_is_tl and tok.token_type == 'dot' and
+ last[-1].token_type == 'cfws'):
+ res[-1] = TokenList(last[:-1])
+ is_tl = isinstance(tok, TokenList)
+ if (is_tl and last.token_type == 'dot' and
+ tok[0].token_type == 'cfws'):
+ res.append(TokenList(tok[1:]))
+ else:
+ res.append(tok)
+ last = res[-1]
+ last_is_tl = is_tl
+ res = TokenList(res[1:-1])
+ return res.value
+
+
+class DomainLiteral(TokenList):
+
+ token_type = 'domain-literal'
+
+ @property
+ def domain(self):
+ return ''.join(super().value.split())
+
+ @property
+ def ip(self):
+ for x in self:
+ if x.token_type == 'ptext':
+ return x.value
+
+
+class MIMEVersion(TokenList):
+
+ token_type = 'mime-version'
+ major = None
+ minor = None
+
+
+class Parameter(TokenList):
+
+ token_type = 'parameter'
+ sectioned = False
+ extended = False
+ charset = 'us-ascii'
+
+ @property
+ def section_number(self):
+ # Because the first token, the attribute (name) eats CFWS, the second
+ # token is always the section if there is one.
+ return self[1].number if self.sectioned else 0
+
+ @property
+ def param_value(self):
+ # This is part of the "handle quoted extended parameters" hack.
+ for token in self:
+ if token.token_type == 'value':
+ return token.stripped_value
+ if token.token_type == 'quoted-string':
+ for token in token:
+ if token.token_type == 'bare-quoted-string':
+ for token in token:
+ if token.token_type == 'value':
+ return token.stripped_value
+ return ''
+
+
+class InvalidParameter(Parameter):
+
+ token_type = 'invalid-parameter'
+
+
+class Attribute(TokenList):
+
+ token_type = 'attribute'
+
+ @property
+ def stripped_value(self):
+ for token in self:
+ if token.token_type.endswith('attrtext'):
+ return token.value
+
+class Section(TokenList):
+
+ token_type = 'section'
+ number = None
+
+
+class Value(TokenList):
+
+ token_type = 'value'
+
+ @property
+ def stripped_value(self):
+ token = self[0]
+ if token.token_type == 'cfws':
+ token = self[1]
+ if token.token_type.endswith(
+ ('quoted-string', 'attribute', 'extended-attribute')):
+ return token.stripped_value
+ return self.value
+
+
+class MimeParameters(TokenList):
+
+ token_type = 'mime-parameters'
+
+ @property
+ def params(self):
+ # The RFC specifically states that the ordering of parameters is not
+ # guaranteed and may be reordered by the transport layer. So we have
+ # to assume the RFC 2231 pieces can come in any order. However, we
+ # output them in the order that we first see a given name, which gives
+ # us a stable __str__.
+ params = OrderedDict()
+ for token in self:
+ if not token.token_type.endswith('parameter'):
+ continue
+ if token[0].token_type != 'attribute':
+ continue
+ name = token[0].value.strip()
+ if name not in params:
+ params[name] = []
+ params[name].append((token.section_number, token))
+ for name, parts in params.items():
+ parts = sorted(parts)
+ # XXX: there might be more recovery we could do here if, for
+ # example, this is really a case of a duplicate attribute name.
+ value_parts = []
+ charset = parts[0][1].charset
+ for i, (section_number, param) in enumerate(parts):
+ if section_number != i:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "inconsistent multipart parameter numbering"))
+ value = param.param_value
+ if param.extended:
+ try:
+ value = urllib.parse.unquote_to_bytes(value)
+ except UnicodeEncodeError:
+ # source had surrogate escaped bytes. What we do now
+ # is a bit of an open question. I'm not sure this is
+ # the best choice, but it is what the old algorithm did
+ value = urllib.parse.unquote(value, encoding='latin-1')
+ else:
+ try:
+ value = value.decode(charset, 'surrogateescape')
+ except LookupError:
+ # XXX: there should really be a custom defect for
+ # unknown character set to make it easy to find,
+ # because otherwise unknown charset is a silent
+ # failure.
+ value = value.decode('us-ascii', 'surrogateescape')
+ if utils._has_surrogates(value):
+ param.defects.append(errors.UndecodableBytesDefect())
+ value_parts.append(value)
+ value = ''.join(value_parts)
+ yield name, value
+
+ def __str__(self):
+ params = []
+ for name, value in self.params:
+ if value:
+ params.append('{}={}'.format(name, quote_string(value)))
+ else:
+ params.append(name)
+ params = '; '.join(params)
+ return ' ' + params if params else ''
+
+
+class ParameterizedHeaderValue(TokenList):
+
+ @property
+ def params(self):
+ for token in reversed(self):
+ if token.token_type == 'mime-parameters':
+ return token.params
+ return {}
+
+ @property
+ def parts(self):
+ if self and self[-1].token_type == 'mime-parameters':
+ # We don't want to start a new line if all of the params don't fit
+ # after the value, so unwrap the parameter list.
+ return TokenList(self[:-1] + self[-1])
+ return TokenList(self).parts
+
+
+class ContentType(ParameterizedHeaderValue):
+
+ token_type = 'content-type'
+ maintype = 'text'
+ subtype = 'plain'
+
+
+class ContentDisposition(ParameterizedHeaderValue):
+
+ token_type = 'content-disposition'
+ content_disposition = None
+
+
+class ContentTransferEncoding(TokenList):
+
+ token_type = 'content-transfer-encoding'
+ cte = '7bit'
+
+
+class HeaderLabel(TokenList):
+
+ token_type = 'header-label'
+
+
+class Header(TokenList):
+
+ token_type = 'header'
+
+ def _fold(self, folded):
+ folded.append(str(self.pop(0)))
+ folded.lastlen = len(folded.current[0])
+ # The first line of the header is different from all others: we don't
+ # want to start a new object on a new line if it has any fold points in
+ # it that would allow part of it to be on the first header line.
+ # Further, if the first fold point would fit on the new line, we want
+ # to do that, but if it doesn't we want to put it on the first line.
+ # Folded supports this via the stickyspace attribute. If this
+ # attribute is not None, it does the special handling.
+ folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else ''
+ rest = self.pop(0)
+ if self:
+ raise ValueError("Malformed Header token list")
+ rest._fold(folded)
+
+
+#
+# Terminal classes and instances
+#
+
+class Terminal(str):
+
+ def __new__(cls, value, token_type):
+ self = super().__new__(cls, value)
+ self.token_type = token_type
+ self.defects = []
+ return self
+
+ def __repr__(self):
+ return "{}({})".format(self.__class__.__name__, super().__repr__())
+
+ @property
+ def all_defects(self):
+ return list(self.defects)
+
+ def _pp(self, indent=''):
+ return ["{}{}/{}({}){}".format(
+ indent,
+ self.__class__.__name__,
+ self.token_type,
+ super().__repr__(),
+ '' if not self.defects else ' {}'.format(self.defects),
+ )]
+
+ def cte_encode(self, charset, policy):
+ value = str(self)
+ try:
+ value.encode('us-ascii')
+ return value
+ except UnicodeEncodeError:
+ return _ew.encode(value, charset)
+
+ def pop_trailing_ws(self):
+ # This terminates the recursion.
+ return None
+
+ def pop_leading_fws(self):
+ # This terminates the recursion.
+ return None
+
+ @property
+ def comments(self):
+ return []
+
+ def has_leading_comment(self):
+ return False
+
+ def __getnewargs__(self):
+ return(str(self), self.token_type)
+
+
+class WhiteSpaceTerminal(Terminal):
+
+ @property
+ def value(self):
+ return ' '
+
+ def startswith_fws(self):
+ return True
+
+ has_fws = True
+
+
+class ValueTerminal(Terminal):
+
+ @property
+ def value(self):
+ return self
+
+ def startswith_fws(self):
+ return False
+
+ has_fws = False
+
+ def as_encoded_word(self, charset):
+ return _ew.encode(str(self), charset)
+
+
+class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
+
+ @property
+ def value(self):
+ return ''
+
+ @property
+ def encoded(self):
+ return self[:]
+
+ def __str__(self):
+ return ''
+
+ has_fws = True
+
+
+# XXX these need to become classes and used as instances so
+# that a program can't change them in a parse tree and screw
+# up other parse trees. Maybe should have tests for that, too.
+DOT = ValueTerminal('.', 'dot')
+ListSeparator = ValueTerminal(',', 'list-separator')
+RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
+
+#
+# Parser
+#
+
+"""Parse strings according to RFC822/2047/2822/5322 rules.
+
+This is a stateless parser. Each get_XXX function accepts a string and
+returns either a Terminal or a TokenList representing the RFC object named
+by the method and a string containing the remaining unparsed characters
+from the input. Thus a parser method consumes the next syntactic construct
+of a given type and returns a token representing the construct plus the
+unparsed remainder of the input string.
+
+For example, if the first element of a structured header is a 'phrase',
+then:
+
+ phrase, value = get_phrase(value)
+
+returns the complete phrase from the start of the string value, plus any
+characters left in the string after the phrase is removed.
+
+"""
+
+_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
+_non_atom_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']','\]'))).match
+_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall
+_non_token_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']','\]'))).match
+_non_attribute_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']','\]'))).match
+_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format(
+ ''.join(EXTENDED_ATTRIBUTE_ENDS).replace(
+ '\\','\\\\').replace(']','\]'))).match
+
+def _validate_xtext(xtext):
+ """If input token contains ASCII non-printables, register a defect."""
+
+ non_printables = _non_printable_finder(xtext)
+ if non_printables:
+ xtext.defects.append(errors.NonPrintableDefect(non_printables))
+ if utils._has_surrogates(xtext):
+ xtext.defects.append(errors.UndecodableBytesDefect(
+ "Non-ASCII characters found in header token"))
+
+def _get_ptext_to_endchars(value, endchars):
+ """Scan printables/quoted-pairs until endchars and return unquoted ptext.
+
+ This function turns a run of qcontent, ccontent-without-comments, or
+ dtext-with-quoted-printables into a single string by unquoting any
+ quoted printables. It returns the string, the remaining value, and
+ a flag that is True iff there were any quoted printables decoded.
+
+ """
+ fragment, *remainder = _wsp_splitter(value, 1)
+ vchars = []
+ escape = False
+ had_qp = False
+ for pos in range(len(fragment)):
+ if fragment[pos] == '\\':
+ if escape:
+ escape = False
+ had_qp = True
+ else:
+ escape = True
+ continue
+ if escape:
+ escape = False
+ elif fragment[pos] in endchars:
+ break
+ vchars.append(fragment[pos])
+ else:
+ pos = pos + 1
+ return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp
+
+def _decode_ew_run(value):
+ """ Decode a run of RFC2047 encoded words.
+
+ _decode_ew_run(value) -> (text, value, defects)
+
+ Scans the supplied value for a run of tokens that look like they are RFC
+ 2047 encoded words, decodes those words into text according to RFC 2047
+ rules (whitespace between encoded words is discarded), and returns the text
+ and the remaining value (including any leading whitespace on the remaining
+ value), as well as a list of any defects encountered while decoding. The
+ input value may not have any leading whitespace.
+
+ """
+ res = []
+ defects = []
+ last_ws = ''
+ while value:
+ try:
+ tok, ws, value = _wsp_splitter(value, 1)
+ except ValueError:
+ tok, ws, value = value, '', ''
+ if not (tok.startswith('=?') and tok.endswith('?=')):
+ return ''.join(res), last_ws + tok + ws + value, defects
+ text, charset, lang, new_defects = _ew.decode(tok)
+ res.append(text)
+ defects.extend(new_defects)
+ last_ws = ws
+ return ''.join(res), last_ws, defects
+
+def get_fws(value):
+ """FWS = 1*WSP
+
+ This isn't the RFC definition. We're using fws to represent tokens where
+ folding can be done, but when we are parsing the *un*folding has already
+ been done so we don't need to watch out for CRLF.
+
+ """
+ newvalue = value.lstrip()
+ fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
+ return fws, newvalue
+
+def get_encoded_word(value):
+ """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+
+ """
+ ew = EncodedWord()
+ if not value.startswith('=?'):
+ raise errors.HeaderParseError(
+ "expected encoded word but found {}".format(value))
+ tok, *remainder = value[2:].split('?=', 1)
+ if tok == value[2:]:
+ raise errors.HeaderParseError(
+ "expected encoded word but found {}".format(value))
+ remstr = ''.join(remainder)
+ if remstr[:2].isdigit():
+ rest, *remainder = remstr.split('?=', 1)
+ tok = tok + '?=' + rest
+ if len(tok.split()) > 1:
+ ew.defects.append(errors.InvalidHeaderDefect(
+ "whitespace inside encoded word"))
+ ew.cte = value
+ value = ''.join(remainder)
+ try:
+ text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
+ except ValueError:
+ raise errors.HeaderParseError(
+ "encoded word format invalid: '{}'".format(ew.cte))
+ ew.charset = charset
+ ew.lang = lang
+ ew.defects.extend(defects)
+ while text:
+ if text[0] in WSP:
+ token, text = get_fws(text)
+ ew.append(token)
+ continue
+ chars, *remainder = _wsp_splitter(text, 1)
+ vtext = ValueTerminal(chars, 'vtext')
+ _validate_xtext(vtext)
+ ew.append(vtext)
+ text = ''.join(remainder)
+ return ew, value
+
+def get_unstructured(value):
+ """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct
+ obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS)
+ obs-utext = %d0 / obs-NO-WS-CTL / LF / CR
+
+ obs-NO-WS-CTL is control characters except WSP/CR/LF.
+
+ So, basically, we have printable runs, plus control characters or nulls in
+ the obsolete syntax, separated by whitespace. Since RFC 2047 uses the
+ obsolete syntax in its specification, but requires whitespace on either
+ side of the encoded words, I can see no reason to need to separate the
+ non-printable-non-whitespace from the printable runs if they occur, so we
+ parse this into xtext tokens separated by WSP tokens.
+
+ Because an 'unstructured' value must by definition constitute the entire
+ value, this 'get' routine does not return a remaining value, only the
+ parsed TokenList.
+
+ """
+ # XXX: but what about bare CR and LF? They might signal the start or
+ # end of an encoded word. YAGNI for now, since out current parsers
+ # will never send us strings with bard CR or LF.
+
+ unstructured = UnstructuredTokenList()
+ while value:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ unstructured.append(token)
+ continue
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ pass
+ else:
+ have_ws = True
+ if len(unstructured) > 0:
+ if unstructured[-1].token_type != 'fws':
+ unstructured.defects.append(errors.InvalidHeaderDefect(
+ "missing whitespace before encoded word"))
+ have_ws = False
+ if have_ws and len(unstructured) > 1:
+ if unstructured[-2].token_type == 'encoded-word':
+ unstructured[-1] = EWWhiteSpaceTerminal(
+ unstructured[-1], 'fws')
+ unstructured.append(token)
+ continue
+ tok, *remainder = _wsp_splitter(value, 1)
+ vtext = ValueTerminal(tok, 'vtext')
+ _validate_xtext(vtext)
+ unstructured.append(vtext)
+ value = ''.join(remainder)
+ return unstructured
+
+def get_qp_ctext(value):
+ """ctext = <printable ascii except \ ( )>
+
+ This is not the RFC ctext, since we are handling nested comments in comment
+ and unquoting quoted-pairs here. We allow anything except the '()'
+ characters, but if we find any ASCII other than the RFC defined printable
+ ASCII an NonPrintableDefect is added to the token's defects list. Since
+ quoted pairs are converted to their unquoted values, what is returned is
+ a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value
+ is ' '.
+
+ """
+ ptext, value, _ = _get_ptext_to_endchars(value, '()')
+ ptext = WhiteSpaceTerminal(ptext, 'ptext')
+ _validate_xtext(ptext)
+ return ptext, value
+
+def get_qcontent(value):
+ """qcontent = qtext / quoted-pair
+
+ We allow anything except the DQUOTE character, but if we find any ASCII
+ other than the RFC defined printable ASCII an NonPrintableDefect is
+ added to the token's defects list. Any quoted pairs are converted to their
+ unquoted values, so what is returned is a 'ptext' token. In this case it
+ is a ValueTerminal.
+
+ """
+ ptext, value, _ = _get_ptext_to_endchars(value, '"')
+ ptext = ValueTerminal(ptext, 'ptext')
+ _validate_xtext(ptext)
+ return ptext, value
+
+def get_atext(value):
+ """atext = <matches _atext_matcher>
+
+ We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to
+ the token's defects list if we find non-atext characters.
+ """
+ m = _non_atom_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected atext but found '{}'".format(value))
+ atext = m.group()
+ value = value[len(atext):]
+ atext = ValueTerminal(atext, 'atext')
+ _validate_xtext(atext)
+ return atext, value
+
+def get_bare_quoted_string(value):
+ """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+
+ A quoted-string without the leading or trailing white space. Its
+ value is the text between the quote marks, with whitespace
+ preserved and quoted pairs decoded.
+ """
+ if value[0] != '"':
+ raise errors.HeaderParseError(
+ "expected '\"' but found '{}'".format(value))
+ bare_quoted_string = BareQuotedString()
+ value = value[1:]
+ while value and value[0] != '"':
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_qcontent(value)
+ bare_quoted_string.append(token)
+ if not value:
+ bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+ "end of header inside quoted string"))
+ return bare_quoted_string, value
+ return bare_quoted_string, value[1:]
+
+def get_comment(value):
+ """comment = "(" *([FWS] ccontent) [FWS] ")"
+ ccontent = ctext / quoted-pair / comment
+
+ We handle nested comments here, and quoted-pair in our qp-ctext routine.
+ """
+ if value and value[0] != '(':
+ raise errors.HeaderParseError(
+ "expected '(' but found '{}'".format(value))
+ comment = Comment()
+ value = value[1:]
+ while value and value[0] != ")":
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ elif value[0] == '(':
+ token, value = get_comment(value)
+ else:
+ token, value = get_qp_ctext(value)
+ comment.append(token)
+ if not value:
+ comment.defects.append(errors.InvalidHeaderDefect(
+ "end of header inside comment"))
+ return comment, value
+ return comment, value[1:]
+
+def get_cfws(value):
+ """CFWS = (1*([FWS] comment) [FWS]) / FWS
+
+ """
+ cfws = CFWSList()
+ while value and value[0] in CFWS_LEADER:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_comment(value)
+ cfws.append(token)
+ return cfws, value
+
+def get_quoted_string(value):
+ """quoted-string = [CFWS] <bare-quoted-string> [CFWS]
+
+ 'bare-quoted-string' is an intermediate class defined by this
+ parser and not by the RFC grammar. It is the quoted string
+ without any attached CFWS.
+ """
+ quoted_string = QuotedString()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ quoted_string.append(token)
+ token, value = get_bare_quoted_string(value)
+ quoted_string.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ quoted_string.append(token)
+ return quoted_string, value
+
+def get_atom(value):
+ """atom = [CFWS] 1*atext [CFWS]
+
+ """
+ atom = Atom()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ atom.append(token)
+ if value and value[0] in ATOM_ENDS:
+ raise errors.HeaderParseError(
+ "expected atom but found '{}'".format(value))
+ token, value = get_atext(value)
+ atom.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ atom.append(token)
+ return atom, value
+
+def get_dot_atom_text(value):
+ """ dot-text = 1*atext *("." 1*atext)
+
+ """
+ dot_atom_text = DotAtomText()
+ if not value or value[0] in ATOM_ENDS:
+ raise errors.HeaderParseError("expected atom at a start of "
+ "dot-atom-text but found '{}'".format(value))
+ while value and value[0] not in ATOM_ENDS:
+ token, value = get_atext(value)
+ dot_atom_text.append(token)
+ if value and value[0] == '.':
+ dot_atom_text.append(DOT)
+ value = value[1:]
+ if dot_atom_text[-1] is DOT:
+ raise errors.HeaderParseError("expected atom at end of dot-atom-text "
+ "but found '{}'".format('.'+value))
+ return dot_atom_text, value
+
+def get_dot_atom(value):
+ """ dot-atom = [CFWS] dot-atom-text [CFWS]
+
+ """
+ dot_atom = DotAtom()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ dot_atom.append(token)
+ token, value = get_dot_atom_text(value)
+ dot_atom.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ dot_atom.append(token)
+ return dot_atom, value
+
+def get_word(value):
+ """word = atom / quoted-string
+
+ Either atom or quoted-string may start with CFWS. We have to peel off this
+ CFWS first to determine which type of word to parse. Afterward we splice
+ the leading CFWS, if any, into the parsed sub-token.
+
+ If neither an atom or a quoted-string is found before the next special, a
+ HeaderParseError is raised.
+
+ The token returned is either an Atom or a QuotedString, as appropriate.
+ This means the 'word' level of the formal grammar is not represented in the
+ parse tree; this is because having that extra layer when manipulating the
+ parse tree is more confusing than it is helpful.
+
+ """
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ else:
+ leader = None
+ if value[0]=='"':
+ token, value = get_quoted_string(value)
+ elif value[0] in SPECIALS:
+ raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' "
+ "but found '{}'".format(value))
+ else:
+ token, value = get_atom(value)
+ if leader is not None:
+ token[:0] = [leader]
+ return token, value
+
+def get_phrase(value):
+ """ phrase = 1*word / obs-phrase
+ obs-phrase = word *(word / "." / CFWS)
+
+ This means a phrase can be a sequence of words, periods, and CFWS in any
+ order as long as it starts with at least one word. If anything other than
+ words is detected, an ObsoleteHeaderDefect is added to the token's defect
+ list. We also accept a phrase that starts with CFWS followed by a dot;
+ this is registered as an InvalidHeaderDefect, since it is not supported by
+ even the obsolete grammar.
+
+ """
+ phrase = Phrase()
+ try:
+ token, value = get_word(value)
+ phrase.append(token)
+ except errors.HeaderParseError:
+ phrase.defects.append(errors.InvalidHeaderDefect(
+ "phrase does not start with word"))
+ while value and value[0] not in PHRASE_ENDS:
+ if value[0]=='.':
+ phrase.append(DOT)
+ phrase.defects.append(errors.ObsoleteHeaderDefect(
+ "period in 'phrase'"))
+ value = value[1:]
+ else:
+ try:
+ token, value = get_word(value)
+ except errors.HeaderParseError:
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ phrase.defects.append(errors.ObsoleteHeaderDefect(
+ "comment found without atom"))
+ else:
+ raise
+ phrase.append(token)
+ return phrase, value
+
+def get_local_part(value):
+ """ local-part = dot-atom / quoted-string / obs-local-part
+
+ """
+ local_part = LocalPart()
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected local-part but found '{}'".format(value))
+ try:
+ token, value = get_dot_atom(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_word(value)
+ except errors.HeaderParseError:
+ if value[0] != '\\' and value[0] in PHRASE_ENDS:
+ raise
+ token = TokenList()
+ if leader is not None:
+ token[:0] = [leader]
+ local_part.append(token)
+ if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+ obs_local_part, value = get_obs_local_part(str(local_part) + value)
+ if obs_local_part.token_type == 'invalid-obs-local-part':
+ local_part.defects.append(errors.InvalidHeaderDefect(
+ "local-part is not dot-atom, quoted-string, or obs-local-part"))
+ else:
+ local_part.defects.append(errors.ObsoleteHeaderDefect(
+ "local-part is not a dot-atom (contains CFWS)"))
+ local_part[0] = obs_local_part
+ try:
+ local_part.value.encode('ascii')
+ except UnicodeEncodeError:
+ local_part.defects.append(errors.NonASCIILocalPartDefect(
+ "local-part contains non-ASCII characters)"))
+ return local_part, value
+
+def get_obs_local_part(value):
+ """ obs-local-part = word *("." word)
+ """
+ obs_local_part = ObsLocalPart()
+ last_non_ws_was_dot = False
+ while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+ if value[0] == '.':
+ if last_non_ws_was_dot:
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "invalid repeated '.'"))
+ obs_local_part.append(DOT)
+ last_non_ws_was_dot = True
+ value = value[1:]
+ continue
+ elif value[0]=='\\':
+ obs_local_part.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "'\\' character outside of quoted-string/ccontent"))
+ last_non_ws_was_dot = False
+ continue
+ if obs_local_part and obs_local_part[-1].token_type != 'dot':
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "missing '.' between words"))
+ try:
+ token, value = get_word(value)
+ last_non_ws_was_dot = False
+ except errors.HeaderParseError:
+ if value[0] not in CFWS_LEADER:
+ raise
+ token, value = get_cfws(value)
+ obs_local_part.append(token)
+ if (obs_local_part[0].token_type == 'dot' or
+ obs_local_part[0].token_type=='cfws' and
+ obs_local_part[1].token_type=='dot'):
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "Invalid leading '.' in local part"))
+ if (obs_local_part[-1].token_type == 'dot' or
+ obs_local_part[-1].token_type=='cfws' and
+ obs_local_part[-2].token_type=='dot'):
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "Invalid trailing '.' in local part"))
+ if obs_local_part.defects:
+ obs_local_part.token_type = 'invalid-obs-local-part'
+ return obs_local_part, value
+
+def get_dtext(value):
+ """ dtext = <printable ascii except \ [ ]> / obs-dtext
+ obs-dtext = obs-NO-WS-CTL / quoted-pair
+
+ We allow anything except the excluded characters, but but if we find any
+ ASCII other than the RFC defined printable ASCII an NonPrintableDefect is
+ added to the token's defects list. Quoted pairs are converted to their
+ unquoted values, so what is returned is a ptext token, in this case a
+ ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is
+ added to the returned token's defect list.
+
+ """
+ ptext, value, had_qp = _get_ptext_to_endchars(value, '[]')
+ ptext = ValueTerminal(ptext, 'ptext')
+ if had_qp:
+ ptext.defects.append(errors.ObsoleteHeaderDefect(
+ "quoted printable found in domain-literal"))
+ _validate_xtext(ptext)
+ return ptext, value
+
+def _check_for_early_dl_end(value, domain_literal):
+ if value:
+ return False
+ domain_literal.append(errors.InvalidHeaderDefect(
+ "end of input inside domain-literal"))
+ domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+ return True
+
+def get_domain_literal(value):
+ """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+
+ """
+ domain_literal = DomainLiteral()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ domain_literal.append(token)
+ if not value:
+ raise errors.HeaderParseError("expected domain-literal")
+ if value[0] != '[':
+ raise errors.HeaderParseError("expected '[' at start of domain-literal "
+ "but found '{}'".format(value))
+ value = value[1:]
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ domain_literal.append(token)
+ token, value = get_dtext(value)
+ domain_literal.append(token)
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ domain_literal.append(token)
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ if value[0] != ']':
+ raise errors.HeaderParseError("expected ']' at end of domain-literal "
+ "but found '{}'".format(value))
+ domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ domain_literal.append(token)
+ return domain_literal, value
+
+def get_domain(value):
+ """ domain = dot-atom / domain-literal / obs-domain
+ obs-domain = atom *("." atom))
+
+ """
+ domain = Domain()
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected domain but found '{}'".format(value))
+ if value[0] == '[':
+ token, value = get_domain_literal(value)
+ if leader is not None:
+ token[:0] = [leader]
+ domain.append(token)
+ return domain, value
+ try:
+ token, value = get_dot_atom(value)
+ except errors.HeaderParseError:
+ token, value = get_atom(value)
+ if leader is not None:
+ token[:0] = [leader]
+ domain.append(token)
+ if value and value[0] == '.':
+ domain.defects.append(errors.ObsoleteHeaderDefect(
+ "domain is not a dot-atom (contains CFWS)"))
+ if domain[0].token_type == 'dot-atom':
+ domain[:] = domain[0]
+ while value and value[0] == '.':
+ domain.append(DOT)
+ token, value = get_atom(value[1:])
+ domain.append(token)
+ return domain, value
+
+def get_addr_spec(value):
+ """ addr-spec = local-part "@" domain
+
+ """
+ addr_spec = AddrSpec()
+ token, value = get_local_part(value)
+ addr_spec.append(token)
+ if not value or value[0] != '@':
+ addr_spec.defects.append(errors.InvalidHeaderDefect(
+ "add-spec local part with no domain"))
+ return addr_spec, value
+ addr_spec.append(ValueTerminal('@', 'address-at-symbol'))
+ token, value = get_domain(value[1:])
+ addr_spec.append(token)
+ return addr_spec, value
+
+def get_obs_route(value):
+ """ obs-route = obs-domain-list ":"
+ obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])
+
+ Returns an obs-route token with the appropriate sub-tokens (that is,
+ there is no obs-domain-list in the parse tree).
+ """
+ obs_route = ObsRoute()
+ while value and (value[0]==',' or value[0] in CFWS_LEADER):
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ obs_route.append(token)
+ elif value[0] == ',':
+ obs_route.append(ListSeparator)
+ value = value[1:]
+ if not value or value[0] != '@':
+ raise errors.HeaderParseError(
+ "expected obs-route domain but found '{}'".format(value))
+ obs_route.append(RouteComponentMarker)
+ token, value = get_domain(value[1:])
+ obs_route.append(token)
+ while value and value[0]==',':
+ obs_route.append(ListSeparator)
+ value = value[1:]
+ if not value:
+ break
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ obs_route.append(token)
+ if value[0] == '@':
+ obs_route.append(RouteComponentMarker)
+ token, value = get_domain(value[1:])
+ obs_route.append(token)
+ if not value:
+ raise errors.HeaderParseError("end of header while parsing obs-route")
+ if value[0] != ':':
+ raise errors.HeaderParseError( "expected ':' marking end of "
+ "obs-route but found '{}'".format(value))
+ obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker'))
+ return obs_route, value[1:]
+
+def get_angle_addr(value):
+ """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr
+ obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
+
+ """
+ angle_addr = AngleAddr()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ angle_addr.append(token)
+ if not value or value[0] != '<':
+ raise errors.HeaderParseError(
+ "expected angle-addr but found '{}'".format(value))
+ angle_addr.append(ValueTerminal('<', 'angle-addr-start'))
+ value = value[1:]
+ # Although it is not legal per RFC5322, SMTP uses '<>' in certain
+ # circumstances.
+ if value[0] == '>':
+ angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+ angle_addr.defects.append(errors.InvalidHeaderDefect(
+ "null addr-spec in angle-addr"))
+ value = value[1:]
+ return angle_addr, value
+ try:
+ token, value = get_addr_spec(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_obs_route(value)
+ angle_addr.defects.append(errors.ObsoleteHeaderDefect(
+ "obsolete route specification in angle-addr"))
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected addr-spec or obs-route but found '{}'".format(value))
+ angle_addr.append(token)
+ token, value = get_addr_spec(value)
+ angle_addr.append(token)
+ if value and value[0] == '>':
+ value = value[1:]
+ else:
+ angle_addr.defects.append(errors.InvalidHeaderDefect(
+ "missing trailing '>' on angle-addr"))
+ angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ angle_addr.append(token)
+ return angle_addr, value
+
+def get_display_name(value):
+ """ display-name = phrase
+
+ Because this is simply a name-rule, we don't return a display-name
+ token containing a phrase, but rather a display-name token with
+ the content of the phrase.
+
+ """
+ display_name = DisplayName()
+ token, value = get_phrase(value)
+ display_name.extend(token[:])
+ display_name.defects = token.defects[:]
+ return display_name, value
+
+
+def get_name_addr(value):
+ """ name-addr = [display-name] angle-addr
+
+ """
+ name_addr = NameAddr()
+ # Both the optional display name and the angle-addr can start with cfws.
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(leader))
+ if value[0] != '<':
+ if value[0] in PHRASE_ENDS:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(value))
+ token, value = get_display_name(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(token))
+ if leader is not None:
+ token[0][:0] = [leader]
+ leader = None
+ name_addr.append(token)
+ token, value = get_angle_addr(value)
+ if leader is not None:
+ token[:0] = [leader]
+ name_addr.append(token)
+ return name_addr, value
+
+def get_mailbox(value):
+ """ mailbox = name-addr / addr-spec
+
+ """
+ # The only way to figure out if we are dealing with a name-addr or an
+ # addr-spec is to try parsing each one.
+ mailbox = Mailbox()
+ try:
+ token, value = get_name_addr(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_addr_spec(value)
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected mailbox but found '{}'".format(value))
+ if any(isinstance(x, errors.InvalidHeaderDefect)
+ for x in token.all_defects):
+ mailbox.token_type = 'invalid-mailbox'
+ mailbox.append(token)
+ return mailbox, value
+
+def get_invalid_mailbox(value, endchars):
+ """ Read everything up to one of the chars in endchars.
+
+ This is outside the formal grammar. The InvalidMailbox TokenList that is
+ returned acts like a Mailbox, but the data attributes are None.
+
+ """
+ invalid_mailbox = InvalidMailbox()
+ while value and value[0] not in endchars:
+ if value[0] in PHRASE_ENDS:
+ invalid_mailbox.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ invalid_mailbox.append(token)
+ return invalid_mailbox, value
+
+def get_mailbox_list(value):
+ """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
+ obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
+
+ For this routine we go outside the formal grammar in order to improve error
+ handling. We recognize the end of the mailbox list only at the end of the
+ value or at a ';' (the group terminator). This is so that we can turn
+ invalid mailboxes into InvalidMailbox tokens and continue parsing any
+ remaining valid mailboxes. We also allow all mailbox entries to be null,
+ and this condition is handled appropriately at a higher level.
+
+ """
+ mailbox_list = MailboxList()
+ while value and value[0] != ';':
+ try:
+ token, value = get_mailbox(value)
+ mailbox_list.append(token)
+ except errors.HeaderParseError:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value or value[0] in ',;':
+ mailbox_list.append(leader)
+ mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in mailbox-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',;')
+ if leader is not None:
+ token[:0] = [leader]
+ mailbox_list.append(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ elif value[0] == ',':
+ mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in mailbox-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',;')
+ if leader is not None:
+ token[:0] = [leader]
+ mailbox_list.append(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ if value and value[0] not in ',;':
+ # Crap after mailbox; treat it as an invalid mailbox.
+ # The mailbox info will still be available.
+ mailbox = mailbox_list[-1]
+ mailbox.token_type = 'invalid-mailbox'
+ token, value = get_invalid_mailbox(value, ',;')
+ mailbox.extend(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ if value and value[0] == ',':
+ mailbox_list.append(ListSeparator)
+ value = value[1:]
+ return mailbox_list, value
+
+
+def get_group_list(value):
+ """ group-list = mailbox-list / CFWS / obs-group-list
+ obs-group-list = 1*([CFWS] ",") [CFWS]
+
+ """
+ group_list = GroupList()
+ if not value:
+ group_list.defects.append(errors.InvalidHeaderDefect(
+ "end of header before group-list"))
+ return group_list, value
+ leader = None
+ if value and value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ # This should never happen in email parsing, since CFWS-only is a
+ # legal alternative to group-list in a group, which is the only
+ # place group-list appears.
+ group_list.defects.append(errors.InvalidHeaderDefect(
+ "end of header in group-list"))
+ group_list.append(leader)
+ return group_list, value
+ if value[0] == ';':
+ group_list.append(leader)
+ return group_list, value
+ token, value = get_mailbox_list(value)
+ if len(token.all_mailboxes)==0:
+ if leader is not None:
+ group_list.append(leader)
+ group_list.extend(token)
+ group_list.defects.append(errors.ObsoleteHeaderDefect(
+ "group-list with empty entries"))
+ return group_list, value
+ if leader is not None:
+ token[:0] = [leader]
+ group_list.append(token)
+ return group_list, value
+
+def get_group(value):
+ """ group = display-name ":" [group-list] ";" [CFWS]
+
+ """
+ group = Group()
+ token, value = get_display_name(value)
+ if not value or value[0] != ':':
+ raise errors.HeaderParseError("expected ':' at end of group "
+ "display name but found '{}'".format(value))
+ group.append(token)
+ group.append(ValueTerminal(':', 'group-display-name-terminator'))
+ value = value[1:]
+ if value and value[0] == ';':
+ group.append(ValueTerminal(';', 'group-terminator'))
+ return group, value[1:]
+ token, value = get_group_list(value)
+ group.append(token)
+ if not value:
+ group.defects.append(errors.InvalidHeaderDefect(
+ "end of header in group"))
+ if value[0] != ';':
+ raise errors.HeaderParseError(
+ "expected ';' at end of group but found {}".format(value))
+ group.append(ValueTerminal(';', 'group-terminator'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ group.append(token)
+ return group, value
+
+def get_address(value):
+ """ address = mailbox / group
+
+ Note that counter-intuitively, an address can be either a single address or
+ a list of addresses (a group). This is why the returned Address object has
+ a 'mailboxes' attribute which treats a single address as a list of length
+ one. When you need to differentiate between to two cases, extract the single
+ element, which is either a mailbox or a group token.
+
+ """
+ # The formal grammar isn't very helpful when parsing an address. mailbox
+ # and group, especially when allowing for obsolete forms, start off very
+ # similarly. It is only when you reach one of @, <, or : that you know
+ # what you've got. So, we try each one in turn, starting with the more
+ # likely of the two. We could perhaps make this more efficient by looking
+ # for a phrase and then branching based on the next character, but that
+ # would be a premature optimization.
+ address = Address()
+ try:
+ token, value = get_group(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_mailbox(value)
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected address but found '{}'".format(value))
+ address.append(token)
+ return address, value
+
+def get_address_list(value):
+ """ address_list = (address *("," address)) / obs-addr-list
+ obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
+
+ We depart from the formal grammar here by continuing to parse until the end
+ of the input, assuming the input to be entirely composed of an
+ address-list. This is always true in email parsing, and allows us
+ to skip invalid addresses to parse additional valid ones.
+
+ """
+ address_list = AddressList()
+ while value:
+ try:
+ token, value = get_address(value)
+ address_list.append(token)
+ except errors.HeaderParseError as err:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value or value[0] == ',':
+ address_list.append(leader)
+ address_list.defects.append(errors.ObsoleteHeaderDefect(
+ "address-list entry with no content"))
+ else:
+ token, value = get_invalid_mailbox(value, ',')
+ if leader is not None:
+ token[:0] = [leader]
+ address_list.append(Address([token]))
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ elif value[0] == ',':
+ address_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in address-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',')
+ if leader is not None:
+ token[:0] = [leader]
+ address_list.append(Address([token]))
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ if value and value[0] != ',':
+ # Crap after address; treat it as an invalid mailbox.
+ # The mailbox info will still be available.
+ mailbox = address_list[-1][0]
+ mailbox.token_type = 'invalid-mailbox'
+ token, value = get_invalid_mailbox(value, ',')
+ mailbox.extend(token)
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ if value: # Must be a , at this point.
+ address_list.append(ValueTerminal(',', 'list-separator'))
+ value = value[1:]
+ return address_list, value
+
+#
+# XXX: As I begin to add additional header parsers, I'm realizing we probably
+# have two level of parser routines: the get_XXX methods that get a token in
+# the grammar, and parse_XXX methods that parse an entire field value. So
+# get_address_list above should really be a parse_ method, as probably should
+# be get_unstructured.
+#
+
+def parse_mime_version(value):
+ """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS]
+
+ """
+ # The [CFWS] is implicit in the RFC 2045 BNF.
+ # XXX: This routine is a bit verbose, should factor out a get_int method.
+ mime_version = MIMEVersion()
+ if not value:
+ mime_version.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing MIME version number (eg: 1.0)"))
+ return mime_version
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value:
+ mime_version.defects.append(errors.HeaderMissingRequiredValue(
+ "Expected MIME version number but found only CFWS"))
+ digits = ''
+ while value and value[0] != '.' and value[0] not in CFWS_LEADER:
+ digits += value[0]
+ value = value[1:]
+ if not digits.isdigit():
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Expected MIME major version number but found {!r}".format(digits)))
+ mime_version.append(ValueTerminal(digits, 'xtext'))
+ else:
+ mime_version.major = int(digits)
+ mime_version.append(ValueTerminal(digits, 'digits'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value or value[0] != '.':
+ if mime_version.major is not None:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Incomplete MIME version; found only major number"))
+ if value:
+ mime_version.append(ValueTerminal(value, 'xtext'))
+ return mime_version
+ mime_version.append(ValueTerminal('.', 'version-separator'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value:
+ if mime_version.major is not None:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Incomplete MIME version; found only major number"))
+ return mime_version
+ digits = ''
+ while value and value[0] not in CFWS_LEADER:
+ digits += value[0]
+ value = value[1:]
+ if not digits.isdigit():
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Expected MIME minor version number but found {!r}".format(digits)))
+ mime_version.append(ValueTerminal(digits, 'xtext'))
+ else:
+ mime_version.minor = int(digits)
+ mime_version.append(ValueTerminal(digits, 'digits'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if value:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Excess non-CFWS text after MIME version"))
+ mime_version.append(ValueTerminal(value, 'xtext'))
+ return mime_version
+
+def get_invalid_parameter(value):
+ """ Read everything up to the next ';'.
+
+ This is outside the formal grammar. The InvalidParameter TokenList that is
+ returned acts like a Parameter, but the data attributes are None.
+
+ """
+ invalid_parameter = InvalidParameter()
+ while value and value[0] != ';':
+ if value[0] in PHRASE_ENDS:
+ invalid_parameter.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ invalid_parameter.append(token)
+ return invalid_parameter, value
+
+def get_ttext(value):
+ """ttext = <matches _ttext_matcher>
+
+ We allow any non-TOKEN_ENDS in ttext, but add defects to the token's
+ defects list if we find non-ttext characters. We also register defects for
+ *any* non-printables even though the RFC doesn't exclude all of them,
+ because we follow the spirit of RFC 5322.
+
+ """
+ m = _non_token_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected ttext but found '{}'".format(value))
+ ttext = m.group()
+ value = value[len(ttext):]
+ ttext = ValueTerminal(ttext, 'ttext')
+ _validate_xtext(ttext)
+ return ttext, value
+
+def get_token(value):
+ """token = [CFWS] 1*ttext [CFWS]
+
+ The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or
+ tspecials. We also exclude tabs even though the RFC doesn't.
+
+ The RFC implies the CFWS but is not explicit about it in the BNF.
+
+ """
+ mtoken = Token()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mtoken.append(token)
+ if value and value[0] in TOKEN_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_ttext(value)
+ mtoken.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mtoken.append(token)
+ return mtoken, value
+
+def get_attrtext(value):
+ """attrtext = 1*(any non-ATTRIBUTE_ENDS character)
+
+ We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the
+ token's defects list if we find non-attrtext characters. We also register
+ defects for *any* non-printables even though the RFC doesn't exclude all of
+ them, because we follow the spirit of RFC 5322.
+
+ """
+ m = _non_attribute_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected attrtext but found {!r}".format(value))
+ attrtext = m.group()
+ value = value[len(attrtext):]
+ attrtext = ValueTerminal(attrtext, 'attrtext')
+ _validate_xtext(attrtext)
+ return attrtext, value
+
+def get_attribute(value):
+ """ [CFWS] 1*attrtext [CFWS]
+
+ This version of the BNF makes the CFWS explicit, and as usual we use a
+ value terminal for the actual run of characters. The RFC equivalent of
+ attrtext is the token characters, with the subtraction of '*', "'", and '%'.
+ We include tab in the excluded set just as we do for token.
+
+ """
+ attribute = Attribute()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ if value and value[0] in ATTRIBUTE_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_attrtext(value)
+ attribute.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ return attribute, value
+
+def get_extended_attrtext(value):
+ """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')
+
+ This is a special parsing routine so that we get a value that
+ includes % escapes as a single string (which we decode as a single
+ string later).
+
+ """
+ m = _non_extended_attribute_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected extended attrtext but found {!r}".format(value))
+ attrtext = m.group()
+ value = value[len(attrtext):]
+ attrtext = ValueTerminal(attrtext, 'extended-attrtext')
+ _validate_xtext(attrtext)
+ return attrtext, value
+
+def get_extended_attribute(value):
+ """ [CFWS] 1*extended_attrtext [CFWS]
+
+ This is like the non-extended version except we allow % characters, so that
+ we can pick up an encoded value as a single string.
+
+ """
+ # XXX: should we have an ExtendedAttribute TokenList?
+ attribute = Attribute()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ if value and value[0] in EXTENDED_ATTRIBUTE_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_extended_attrtext(value)
+ attribute.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ return attribute, value
+
+def get_section(value):
+ """ '*' digits
+
+ The formal BNF is more complicated because leading 0s are not allowed. We
+ check for that and add a defect. We also assume no CFWS is allowed between
+ the '*' and the digits, though the RFC is not crystal clear on that.
+ The caller should already have dealt with leading CFWS.
+
+ """
+ section = Section()
+ if not value or value[0] != '*':
+ raise errors.HeaderParseError("Expected section but found {}".format(
+ value))
+ section.append(ValueTerminal('*', 'section-marker'))
+ value = value[1:]
+ if not value or not value[0].isdigit():
+ raise errors.HeaderParseError("Expected section number but "
+ "found {}".format(value))
+ digits = ''
+ while value and value[0].isdigit():
+ digits += value[0]
+ value = value[1:]
+ if digits[0] == '0' and digits != '0':
+ section.defects.append(errors.InvalidHeaderError("section number"
+ "has an invalid leading 0"))
+ section.number = int(digits)
+ section.append(ValueTerminal(digits, 'digits'))
+ return section, value
+
+
+def get_value(value):
+ """ quoted-string / attribute
+
+ """
+ v = Value()
+ if not value:
+ raise errors.HeaderParseError("Expected value but found end of string")
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError("Expected value but found "
+ "only {}".format(leader))
+ if value[0] == '"':
+ token, value = get_quoted_string(value)
+ else:
+ token, value = get_extended_attribute(value)
+ if leader is not None:
+ token[:0] = [leader]
+ v.append(token)
+ return v, value
+
+def get_parameter(value):
+ """ attribute [section] ["*"] [CFWS] "=" value
+
+ The CFWS is implied by the RFC but not made explicit in the BNF. This
+ simplified form of the BNF from the RFC is made to conform with the RFC BNF
+ through some extra checks. We do it this way because it makes both error
+ recovery and working with the resulting parse tree easier.
+ """
+ # It is possible CFWS would also be implicitly allowed between the section
+ # and the 'extended-attribute' marker (the '*') , but we've never seen that
+ # in the wild and we will therefore ignore the possibility.
+ param = Parameter()
+ token, value = get_attribute(value)
+ param.append(token)
+ if not value or value[0] == ';':
+ param.defects.append(errors.InvalidHeaderDefect("Parameter contains "
+ "name ({}) but no value".format(token)))
+ return param, value
+ if value[0] == '*':
+ try:
+ token, value = get_section(value)
+ param.sectioned = True
+ param.append(token)
+ except errors.HeaderParseError:
+ pass
+ if not value:
+ raise errors.HeaderParseError("Incomplete parameter")
+ if value[0] == '*':
+ param.append(ValueTerminal('*', 'extended-parameter-marker'))
+ value = value[1:]
+ param.extended = True
+ if value[0] != '=':
+ raise errors.HeaderParseError("Parameter not followed by '='")
+ param.append(ValueTerminal('=', 'parameter-separator'))
+ value = value[1:]
+ leader = None
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ param.append(token)
+ remainder = None
+ appendto = param
+ if param.extended and value and value[0] == '"':
+ # Now for some serious hackery to handle the common invalid case of
+ # double quotes around an extended value. We also accept (with defect)
+ # a value marked as encoded that isn't really.
+ qstring, remainder = get_quoted_string(value)
+ inner_value = qstring.stripped_value
+ semi_valid = False
+ if param.section_number == 0:
+ if inner_value and inner_value[0] == "'":
+ semi_valid = True
+ else:
+ token, rest = get_attrtext(inner_value)
+ if rest and rest[0] == "'":
+ semi_valid = True
+ else:
+ try:
+ token, rest = get_extended_attrtext(inner_value)
+ except:
+ pass
+ else:
+ if not rest:
+ semi_valid = True
+ if semi_valid:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Quoted string value for extended parameter is invalid"))
+ param.append(qstring)
+ for t in qstring:
+ if t.token_type == 'bare-quoted-string':
+ t[:] = []
+ appendto = t
+ break
+ value = inner_value
+ else:
+ remainder = None
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Parameter marked as extended but appears to have a "
+ "quoted string value that is non-encoded"))
+ if value and value[0] == "'":
+ token = None
+ else:
+ token, value = get_value(value)
+ if not param.extended or param.section_number > 0:
+ if not value or value[0] != "'":
+ appendto.append(token)
+ if remainder is not None:
+ assert not value, value
+ value = remainder
+ return param, value
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Apparent initial-extended-value but attribute "
+ "was not marked as extended or was not initial section"))
+ if not value:
+ # Assume the charset/lang is missing and the token is the value.
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Missing required charset/lang delimiters"))
+ appendto.append(token)
+ if remainder is None:
+ return param, value
+ else:
+ if token is not None:
+ for t in token:
+ if t.token_type == 'extended-attrtext':
+ break
+ t.token_type == 'attrtext'
+ appendto.append(t)
+ param.charset = t.value
+ if value[0] != "'":
+ raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+ "delimiter, but found {!r}".format(value))
+ appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
+ value = value[1:]
+ if value and value[0] != "'":
+ token, value = get_attrtext(value)
+ appendto.append(token)
+ param.lang = token.value
+ if not value or value[0] != "'":
+ raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+ "delimiter, but found {}".format(value))
+ appendto.append(ValueTerminal("'", 'RFC2231 delimiter'))
+ value = value[1:]
+ if remainder is not None:
+ # Treat the rest of value as bare quoted string content.
+ v = Value()
+ while value:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_qcontent(value)
+ v.append(token)
+ token = v
+ else:
+ token, value = get_value(value)
+ appendto.append(token)
+ if remainder is not None:
+ assert not value, value
+ value = remainder
+ return param, value
+
+def parse_mime_parameters(value):
+ """ parameter *( ";" parameter )
+
+ That BNF is meant to indicate this routine should only be called after
+ finding and handling the leading ';'. There is no corresponding rule in
+ the formal RFC grammar, but it is more convenient for us for the set of
+ parameters to be treated as its own TokenList.
+
+ This is 'parse' routine because it consumes the reminaing value, but it
+ would never be called to parse a full header. Instead it is called to
+ parse everything after the non-parameter value of a specific MIME header.
+
+ """
+ mime_parameters = MimeParameters()
+ while value:
+ try:
+ token, value = get_parameter(value)
+ mime_parameters.append(token)
+ except errors.HeaderParseError as err:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ mime_parameters.append(leader)
+ return mime_parameters
+ if value[0] == ';':
+ if leader is not None:
+ mime_parameters.append(leader)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "parameter entry with no content"))
+ else:
+ token, value = get_invalid_parameter(value)
+ if leader:
+ token[:0] = [leader]
+ mime_parameters.append(token)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "invalid parameter {!r}".format(token)))
+ if value and value[0] != ';':
+ # Junk after the otherwise valid parameter. Mark it as
+ # invalid, but it will have a value.
+ param = mime_parameters[-1]
+ param.token_type = 'invalid-parameter'
+ token, value = get_invalid_parameter(value)
+ param.extend(token)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "parameter with invalid trailing text {!r}".format(token)))
+ if value:
+ # Must be a ';' at this point.
+ mime_parameters.append(ValueTerminal(';', 'parameter-separator'))
+ value = value[1:]
+ return mime_parameters
+
+def _find_mime_parameters(tokenlist, value):
+ """Do our best to find the parameters in an invalid MIME header
+
+ """
+ while value and value[0] != ';':
+ if value[0] in PHRASE_ENDS:
+ tokenlist.append(ValueTerminal(value[0], 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ tokenlist.append(token)
+ if not value:
+ return
+ tokenlist.append(ValueTerminal(';', 'parameter-separator'))
+ tokenlist.append(parse_mime_parameters(value[1:]))
+
+def parse_content_type_header(value):
+ """ maintype "/" subtype *( ";" parameter )
+
+ The maintype and substype are tokens. Theoretically they could
+ be checked against the official IANA list + x-token, but we
+ don't do that.
+ """
+ ctype = ContentType()
+ recover = False
+ if not value:
+ ctype.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content type specification"))
+ return ctype
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content maintype but found {!r}".format(value)))
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(token)
+ # XXX: If we really want to follow the formal grammer we should make
+ # mantype and subtype specialized TokenLists here. Probably not worth it.
+ if not value or value[0] != '/':
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Invalid content type"))
+ if value:
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.maintype = token.value.strip().lower()
+ ctype.append(ValueTerminal('/', 'content-type-separator'))
+ value = value[1:]
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content subtype but found {!r}".format(value)))
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(token)
+ ctype.subtype = token.value.strip().lower()
+ if not value:
+ return ctype
+ if value[0] != ';':
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Only parameters are valid after content type, but "
+ "found {!r}".format(value)))
+ # The RFC requires that a syntactically invalid content-type be treated
+ # as text/plain. Perhaps we should postel this, but we should probably
+ # only do that if we were checking the subtype value against IANA.
+ del ctype.maintype, ctype.subtype
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(ValueTerminal(';', 'parameter-separator'))
+ ctype.append(parse_mime_parameters(value[1:]))
+ return ctype
+
+def parse_content_disposition_header(value):
+ """ disposition-type *( ";" parameter )
+
+ """
+ disp_header = ContentDisposition()
+ if not value:
+ disp_header.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content disposition"))
+ return disp_header
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content disposition but found {!r}".format(value)))
+ _find_mime_parameters(disp_header, value)
+ return disp_header
+ disp_header.append(token)
+ disp_header.content_disposition = token.value.strip().lower()
+ if not value:
+ return disp_header
+ if value[0] != ';':
+ disp_header.defects.append(errors.InvalidHeaderDefect(
+ "Only parameters are valid after content disposition, but "
+ "found {!r}".format(value)))
+ _find_mime_parameters(disp_header, value)
+ return disp_header
+ disp_header.append(ValueTerminal(';', 'parameter-separator'))
+ disp_header.append(parse_mime_parameters(value[1:]))
+ return disp_header
+
+def parse_content_transfer_encoding_header(value):
+ """ mechanism
+
+ """
+ # We should probably validate the values, since the list is fixed.
+ cte_header = ContentTransferEncoding()
+ if not value:
+ cte_header.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content transfer encoding"))
+ return cte_header
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content trnasfer encoding but found {!r}".format(value)))
+ else:
+ cte_header.append(token)
+ cte_header.cte = token.value.strip().lower()
+ if not value:
+ return cte_header
+ while value:
+ cte_header.defects.append(errors.InvalidHeaderDefect(
+ "Extra text after content transfer encoding"))
+ if value[0] in PHRASE_ENDS:
+ cte_header.append(ValueTerminal(value[0], 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ cte_header.append(token)
+ return cte_header
diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py
index 79573c6..cdfa372 100644
--- a/Lib/email/_parseaddr.py
+++ b/Lib/email/_parseaddr.py
@@ -47,6 +47,25 @@ def parsedate_tz(data):
Accounts for military timezones.
"""
+ res = _parsedate_tz(data)
+ if not res:
+ return
+ if res[9] is None:
+ res[9] = 0
+ return tuple(res)
+
+def _parsedate_tz(data):
+ """Convert date to extended time tuple.
+
+ The last (additional) element is the time zone offset in seconds, except if
+ the timezone was specified as -0000. In that case the last element is
+ None. This indicates a UTC timestamp that explicitly declaims knowledge of
+ the source timezone, as opposed to a +0000 timestamp that indicates the
+ source timezone really was UTC.
+
+ """
+ if not data:
+ return
data = data.split()
# The FWS after the comma after the day-of-week is optional, so search and
# adjust for this.
@@ -99,6 +118,14 @@ def parsedate_tz(data):
tss = '0'
elif len(tm) == 3:
[thh, tmm, tss] = tm
+ elif len(tm) == 1 and '.' in tm[0]:
+ # Some non-compliant MUAs use '.' to separate time elements.
+ tm = tm[0].split('.')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = 0
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
else:
return None
try:
@@ -130,6 +157,8 @@ def parsedate_tz(data):
tzoffset = int(tz)
except ValueError:
pass
+ if tzoffset==0 and tz.startswith('-'):
+ tzoffset = None
# Convert a timezone offset into seconds ; -0500 -> -18000
if tzoffset:
if tzoffset < 0:
@@ -139,7 +168,7 @@ def parsedate_tz(data):
tzsign = 1
tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
# Daylight Saving Time flag is set to -1, since DST is unknown.
- return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
+ return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
def parsedate(data):
diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py
new file mode 100644
index 0000000..8106114
--- /dev/null
+++ b/Lib/email/_policybase.py
@@ -0,0 +1,358 @@
+"""Policy framework for the email package.
+
+Allows fine grained feature control of how the package parses and emits data.
+"""
+
+import abc
+from email import header
+from email import charset as _charset
+from email.utils import _has_surrogates
+
+__all__ = [
+ 'Policy',
+ 'Compat32',
+ 'compat32',
+ ]
+
+
+class _PolicyBase:
+
+ """Policy Object basic framework.
+
+ This class is useless unless subclassed. A subclass should define
+ class attributes with defaults for any values that are to be
+ managed by the Policy object. The constructor will then allow
+ non-default values to be set for these attributes at instance
+ creation time. The instance will be callable, taking these same
+ attributes keyword arguments, and returning a new instance
+ identical to the called instance except for those values changed
+ by the keyword arguments. Instances may be added, yielding new
+ instances with any non-default values from the right hand
+ operand overriding those in the left hand operand. That is,
+
+ A + B == A(<non-default values of B>)
+
+ The repr of an instance can be used to reconstruct the object
+ if and only if the repr of the values can be used to reconstruct
+ those values.
+
+ """
+
+ def __init__(self, **kw):
+ """Create new Policy, possibly overriding some defaults.
+
+ See class docstring for a list of overridable attributes.
+
+ """
+ for name, value in kw.items():
+ if hasattr(self, name):
+ super(_PolicyBase,self).__setattr__(name, value)
+ else:
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ name, self.__class__.__name__))
+
+ def __repr__(self):
+ args = [ "{}={!r}".format(name, value)
+ for name, value in self.__dict__.items() ]
+ return "{}({})".format(self.__class__.__name__, ', '.join(args))
+
+ def clone(self, **kw):
+ """Return a new instance with specified attributes changed.
+
+ The new instance has the same attribute values as the current object,
+ except for the changes passed in as keyword arguments.
+
+ """
+ newpolicy = self.__class__.__new__(self.__class__)
+ for attr, value in self.__dict__.items():
+ object.__setattr__(newpolicy, attr, value)
+ for attr, value in kw.items():
+ if not hasattr(self, attr):
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ attr, self.__class__.__name__))
+ object.__setattr__(newpolicy, attr, value)
+ return newpolicy
+
+ def __setattr__(self, name, value):
+ if hasattr(self, name):
+ msg = "{!r} object attribute {!r} is read-only"
+ else:
+ msg = "{!r} object has no attribute {!r}"
+ raise AttributeError(msg.format(self.__class__.__name__, name))
+
+ def __add__(self, other):
+ """Non-default values from right operand override those from left.
+
+ The object returned is a new instance of the subclass.
+
+ """
+ return self.clone(**other.__dict__)
+
+
+def _append_doc(doc, added_doc):
+ doc = doc.rsplit('\n', 1)[0]
+ added_doc = added_doc.split('\n', 1)[1]
+ return doc + '\n' + added_doc
+
+def _extend_docstrings(cls):
+ if cls.__doc__ and cls.__doc__.startswith('+'):
+ cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
+ for name, attr in cls.__dict__.items():
+ if attr.__doc__ and attr.__doc__.startswith('+'):
+ for c in (c for base in cls.__bases__ for c in base.mro()):
+ doc = getattr(getattr(c, name), '__doc__')
+ if doc:
+ attr.__doc__ = _append_doc(doc, attr.__doc__)
+ break
+ return cls
+
+
+class Policy(_PolicyBase, metaclass=abc.ABCMeta):
+
+ r"""Controls for how messages are interpreted and formatted.
+
+ Most of the classes and many of the methods in the email package accept
+ Policy objects as parameters. A Policy object contains a set of values and
+ functions that control how input is interpreted and how output is rendered.
+ For example, the parameter 'raise_on_defect' controls whether or not an RFC
+ violation results in an error being raised or not, while 'max_line_length'
+ controls the maximum length of output lines when a Message is serialized.
+
+ Any valid attribute may be overridden when a Policy is created by passing
+ it as a keyword argument to the constructor. Policy objects are immutable,
+ but a new Policy object can be created with only certain values changed by
+ calling the Policy instance with keyword arguments. Policy objects can
+ also be added, producing a new Policy object in which the non-default
+ attributes set in the right hand operand overwrite those specified in the
+ left operand.
+
+ Settable attributes:
+
+ raise_on_defect -- If true, then defects should be raised as errors.
+ Default: False.
+
+ linesep -- string containing the value to use as separation
+ between output lines. Default '\n'.
+
+ cte_type -- Type of allowed content transfer encodings
+
+ 7bit -- ASCII only
+ 8bit -- Content-Transfer-Encoding: 8bit is allowed
+
+ Default: 8bit. Also controls the disposition of
+ (RFC invalid) binary data in headers; see the
+ documentation of the binary_fold method.
+
+ max_line_length -- maximum length of lines, excluding 'linesep',
+ during serialization. None or 0 means no line
+ wrapping is done. Default is 78.
+
+ """
+
+ raise_on_defect = False
+ linesep = '\n'
+ cte_type = '8bit'
+ max_line_length = 78
+
+ def handle_defect(self, obj, defect):
+ """Based on policy, either raise defect or call register_defect.
+
+ handle_defect(obj, defect)
+
+ defect should be a Defect subclass, but in any case must be an
+ Exception subclass. obj is the object on which the defect should be
+ registered if it is not raised. If the raise_on_defect is True, the
+ defect is raised as an error, otherwise the object and the defect are
+ passed to register_defect.
+
+ This method is intended to be called by parsers that discover defects.
+ The email package parsers always call it with Defect instances.
+
+ """
+ if self.raise_on_defect:
+ raise defect
+ self.register_defect(obj, defect)
+
+ def register_defect(self, obj, defect):
+ """Record 'defect' on 'obj'.
+
+ Called by handle_defect if raise_on_defect is False. This method is
+ part of the Policy API so that Policy subclasses can implement custom
+ defect handling. The default implementation calls the append method of
+ the defects attribute of obj. The objects used by the email package by
+ default that get passed to this method will always have a defects
+ attribute with an append method.
+
+ """
+ obj.defects.append(defect)
+
+ def header_max_count(self, name):
+ """Return the maximum allowed number of headers named 'name'.
+
+ Called when a header is added to a Message object. If the returned
+ value is not 0 or None, and there are already a number of headers with
+ the name 'name' equal to the value returned, a ValueError is raised.
+
+ Because the default behavior of Message's __setitem__ is to append the
+ value to the list of headers, it is easy to create duplicate headers
+ without realizing it. This method allows certain headers to be limited
+ in the number of instances of that header that may be added to a
+ Message programmatically. (The limit is not observed by the parser,
+ which will faithfully produce as many headers as exist in the message
+ being parsed.)
+
+ The default implementation returns None for all header names.
+ """
+ return None
+
+ @abc.abstractmethod
+ def header_source_parse(self, sourcelines):
+ """Given a list of linesep terminated strings constituting the lines of
+ a single header, return the (name, value) tuple that should be stored
+ in the model. The input lines should retain their terminating linesep
+ characters. The lines passed in by the email package may contain
+ surrogateescaped binary data.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_store_parse(self, name, value):
+ """Given the header name and the value provided by the application
+ program, return the (name, value) that should be stored in the model.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_fetch_parse(self, name, value):
+ """Given the header name and the value from the model, return the value
+ to be returned to the application program that is requesting that
+ header. The value passed in by the email package may contain
+ surrogateescaped binary data if the lines were parsed by a BytesParser.
+ The returned value should not contain any surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold(self, name, value):
+ """Given the header name and the value from the model, return a string
+ containing linesep characters that implement the folding of the header
+ according to the policy controls. The value passed in by the email
+ package may contain surrogateescaped binary data if the lines were
+ parsed by a BytesParser. The returned value should not contain any
+ surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold_binary(self, name, value):
+ """Given the header name and the value from the model, return binary
+ data containing linesep characters that implement the folding of the
+ header according to the policy controls. The value passed in by the
+ email package may contain surrogateescaped binary data.
+
+ """
+ raise NotImplementedError
+
+
+@_extend_docstrings
+class Compat32(Policy):
+
+ """+
+ This particular policy is the backward compatibility Policy. It
+ replicates the behavior of the email package version 5.1.
+ """
+
+ def _sanitize_header(self, name, value):
+ # If the header value contains surrogates, return a Header using
+ # the unknown-8bit charset to encode the bytes as encoded words.
+ if not isinstance(value, str):
+ # Assume it is already a header object
+ return value
+ if _has_surrogates(value):
+ return header.Header(value, charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ return value
+
+ def header_source_parse(self, sourcelines):
+ """+
+ The name is parsed as everything up to the ':' and returned unmodified.
+ The value is determined by stripping leading whitespace off the
+ remainder of the first line, joining all subsequent lines together, and
+ stripping any trailing carriage return or linefeed characters.
+
+ """
+ name, value = sourcelines[0].split(':', 1)
+ value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+ return (name, value.rstrip('\r\n'))
+
+ def header_store_parse(self, name, value):
+ """+
+ The name and value are returned unmodified.
+ """
+ return (name, value)
+
+ def header_fetch_parse(self, name, value):
+ """+
+ If the value contains binary data, it is converted into a Header object
+ using the unknown-8bit charset. Otherwise it is returned unmodified.
+ """
+ return self._sanitize_header(name, value)
+
+ def fold(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. Non-ASCII binary data are CTE encoded using the
+ unknown-8bit charset.
+
+ """
+ return self._fold(name, value, sanitize=True)
+
+ def fold_binary(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
+ encoded using the unknown-8bit charset. Otherwise the original source
+ header is used, with its existing line breaks and/or binary data.
+
+ """
+ folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
+ return folded.encode('ascii', 'surrogateescape')
+
+ def _fold(self, name, value, sanitize):
+ parts = []
+ parts.append('%s: ' % name)
+ if isinstance(value, str):
+ if _has_surrogates(value):
+ if sanitize:
+ h = header.Header(value,
+ charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ parts.append(value)
+ h = None
+ else:
+ h = header.Header(value, header_name=name)
+ else:
+ # Assume it is a Header-like object.
+ h = value
+ if h is not None:
+ parts.append(h.encode(linesep=self.linesep,
+ maxlinelen=self.max_line_length))
+ parts.append(self.linesep)
+ return ''.join(parts)
+
+
+compat32 = Compat32()
diff --git a/Lib/email/architecture.rst b/Lib/email/architecture.rst
new file mode 100644
index 0000000..80d24fe
--- /dev/null
+++ b/Lib/email/architecture.rst
@@ -0,0 +1,216 @@
+:mod:`email` Package Architecture
+=================================
+
+Overview
+--------
+
+The email package consists of three major components:
+
+ Model
+ An object structure that represents an email message, and provides an
+ API for creating, querying, and modifying a message.
+
+ Parser
+ Takes a sequence of characters or bytes and produces a model of the
+ email message represented by those characters or bytes.
+
+ Generator
+ Takes a model and turns it into a sequence of characters or bytes. The
+ sequence can either be intended for human consumption (a printable
+ unicode string) or bytes suitable for transmission over the wire. In
+ the latter case all data is properly encoded using the content transfer
+ encodings specified by the relevant RFCs.
+
+Conceptually the package is organized around the model. The model provides both
+"external" APIs intended for use by application programs using the library,
+and "internal" APIs intended for use by the Parser and Generator components.
+This division is intentionally a bit fuzy; the API described by this documentation
+is all a public, stable API. This allows for an application with special needs
+to implement its own parser and/or generator.
+
+In addition to the three major functional components, there is a third key
+component to the architecture:
+
+ Policy
+ An object that specifies various behavioral settings and carries
+ implementations of various behavior-controlling methods.
+
+The Policy framework provides a simple and convenient way to control the
+behavior of the library, making it possible for the library to be used in a
+very flexible fashion while leveraging the common code required to parse,
+represent, and generate message-like objects. For example, in addition to the
+default :rfc:`5322` email message policy, we also have a policy that manages
+HTTP headers in a fashion compliant with :rfc:`2616`. Individual policy
+controls, such as the maximum line length produced by the generator, can also
+be controlled individually to meet specialized application requirements.
+
+
+The Model
+---------
+
+The message model is implemented by the :class:`~email.message.Message` class.
+The model divides a message into the two fundamental parts discussed by the
+RFC: the header section and the body. The `Message` object acts as a
+pseudo-dictionary of named headers. Its dictionary interface provides
+convenient access to individual headers by name. However, all headers are kept
+internally in an ordered list, so that the information about the order of the
+headers in the original message is preserved.
+
+The `Message` object also has a `payload` that holds the body. A `payload` can
+be one of two things: data, or a list of `Message` objects. The latter is used
+to represent a multipart MIME message. Lists can be nested arbitrarily deeply
+in order to represent the message, with all terminal leaves having non-list
+data payloads.
+
+
+Message Lifecycle
+-----------------
+
+The general lifecyle of a message is:
+
+ Creation
+ A `Message` object can be created by a Parser, or it can be
+ instantiated as an empty message by an application.
+
+ Manipulation
+ The application may examine one or more headers, and/or the
+ payload, and it may modify one or more headers and/or
+ the payload. This may be done on the top level `Message`
+ object, or on any sub-object.
+
+ Finalization
+ The Model is converted into a unicode or binary stream,
+ or the model is discarded.
+
+
+
+Header Policy Control During Lifecycle
+--------------------------------------
+
+One of the major controls exerted by the Policy is the management of headers
+during the `Message` lifecycle. Most applications don't need to be aware of
+this.
+
+A header enters the model in one of two ways: via a Parser, or by being set to
+a specific value by an application program after the Model already exists.
+Similarly, a header exits the model in one of two ways: by being serialized by
+a Generator, or by being retrieved from a Model by an application program. The
+Policy object provides hooks for all four of these pathways.
+
+The model storage for headers is a list of (name, value) tuples.
+
+The Parser identifies headers during parsing, and passes them to the
+:meth:`~email.policy.Policy.header_source_parse` method of the Policy. The
+result of that method is the (name, value) tuple to be stored in the model.
+
+When an application program supplies a header value (for example, through the
+`Message` object `__setitem__` interface), the name and the value are passed to
+the :meth:`~email.policy.Policy.header_store_parse` method of the Policy, which
+returns the (name, value) tuple to be stored in the model.
+
+When an application program retrieves a header (through any of the dict or list
+interfaces of `Message`), the name and value are passed to the
+:meth:`~email.policy.Policy.header_fetch_parse` method of the Policy to
+obtain the value returned to the application.
+
+When a Generator requests a header during serialization, the name and value are
+passed to the :meth:`~email.policy.Policy.fold` method of the Policy, which
+returns a string containing line breaks in the appropriate places. The
+:meth:`~email.policy.Policy.cte_type` Policy control determines whether or
+not Content Transfer Encoding is performed on the data in the header. There is
+also a :meth:`~email.policy.Policy.binary_fold` method for use by generators
+that produce binary output, which returns the folded header as binary data,
+possibly folded at different places than the corresponding string would be.
+
+
+Handling Binary Data
+--------------------
+
+In an ideal world all message data would conform to the RFCs, meaning that the
+parser could decode the message into the idealized unicode message that the
+sender originally wrote. In the real world, the email package must also be
+able to deal with badly formatted messages, including messages containing
+non-ASCII characters that either have no indicated character set or are not
+valid characters in the indicated character set.
+
+Since email messages are *primarily* text data, and operations on message data
+are primarily text operations (except for binary payloads of course), the model
+stores all text data as unicode strings. Un-decodable binary inside text
+data is handled by using the `surrogateescape` error handler of the ASCII
+codec. As with the binary filenames the error handler was introduced to
+handle, this allows the email package to "carry" the binary data received
+during parsing along until the output stage, at which time it is regenerated
+in its original form.
+
+This carried binary data is almost entirely an implementation detail. The one
+place where it is visible in the API is in the "internal" API. A Parser must
+do the `surrogateescape` encoding of binary input data, and pass that data to
+the appropriate Policy method. The "internal" interface used by the Generator
+to access header values preserves the `surrogateescaped` bytes. All other
+interfaces convert the binary data either back into bytes or into a safe form
+(losing information in some cases).
+
+
+Backward Compatibility
+----------------------
+
+The :class:`~email.policy.Policy.Compat32` Policy provides backward
+compatibility with version 5.1 of the email package. It does this via the
+following implementation of the four+1 Policy methods described above:
+
+header_source_parse
+ Splits the first line on the colon to obtain the name, discards any spaces
+ after the colon, and joins the remainder of the line with all of the
+ remaining lines, preserving the linesep characters to obtain the value.
+ Trailing carriage return and/or linefeed characters are stripped from the
+ resulting value string.
+
+header_store_parse
+ Returns the name and value exactly as received from the application.
+
+header_fetch_parse
+ If the value contains any `surrogateescaped` binary data, return the value
+ as a :class:`~email.header.Header` object, using the character set
+ `unknown-8bit`. Otherwise just returns the value.
+
+fold
+ Uses :class:`~email.header.Header`'s folding to fold headers in the
+ same way the email5.1 generator did.
+
+binary_fold
+ Same as fold, but encodes to 'ascii'.
+
+
+New Algorithm
+-------------
+
+header_source_parse
+ Same as legacy behavior.
+
+header_store_parse
+ Same as legacy behavior.
+
+header_fetch_parse
+ If the value is already a header object, returns it. Otherwise, parses the
+ value using the new parser, and returns the resulting object as the value.
+ `surrogateescaped` bytes get turned into unicode unknown character code
+ points.
+
+fold
+ Uses the new header folding algorithm, respecting the policy settings.
+ surrogateescaped bytes are encoded using the ``unknown-8bit`` charset for
+ ``cte_type=7bit`` or ``8bit``. Returns a string.
+
+ At some point there will also be a ``cte_type=unicode``, and for that
+ policy fold will serialize the idealized unicode message with RFC-like
+ folding, converting any surrogateescaped bytes into the unicode
+ unknown character glyph.
+
+binary_fold
+ Uses the new header folding algorithm, respecting the policy settings.
+ surrogateescaped bytes are encoded using the `unknown-8bit` charset for
+ ``cte_type=7bit``, and get turned back into bytes for ``cte_type=8bit``.
+ Returns bytes.
+
+ At some point there will also be a ``cte_type=unicode``, and for that
+ policy binary_fold will serialize the message according to :rfc:``5335``.
diff --git a/Lib/email/errors.py b/Lib/email/errors.py
index d52a624..791239f 100644
--- a/Lib/email/errors.py
+++ b/Lib/email/errors.py
@@ -5,7 +5,6 @@
"""email package exception classes."""
-
class MessageError(Exception):
"""Base class for errors in the email package."""
@@ -30,12 +29,13 @@ class CharsetError(MessageError):
"""An illegal charset was given."""
-
# These are parsing defects which the parser was able to work around.
-class MessageDefect:
+class MessageDefect(ValueError):
"""Base class for a message defect."""
def __init__(self, line=None):
+ if line is not None:
+ super().__init__(line)
self.line = line
class NoBoundaryInMultipartDefect(MessageDefect):
@@ -44,14 +44,64 @@ class NoBoundaryInMultipartDefect(MessageDefect):
class StartBoundaryNotFoundDefect(MessageDefect):
"""The claimed start boundary was never found."""
+class CloseBoundaryNotFoundDefect(MessageDefect):
+ """A start boundary was found, but not the corresponding close boundary."""
+
class FirstHeaderLineIsContinuationDefect(MessageDefect):
"""A message had a continuation line as its first header line."""
class MisplacedEnvelopeHeaderDefect(MessageDefect):
"""A 'Unix-from' header was found in the middle of a header block."""
-class MalformedHeaderDefect(MessageDefect):
- """Found a header that was missing a colon, or was otherwise malformed."""
+class MissingHeaderBodySeparatorDefect(MessageDefect):
+ """Found line with no leading whitespace and no colon before blank line."""
+# XXX: backward compatibility, just in case (it was never emitted).
+MalformedHeaderDefect = MissingHeaderBodySeparatorDefect
class MultipartInvariantViolationDefect(MessageDefect):
"""A message claimed to be a multipart but no subparts were found."""
+
+class InvalidMultipartContentTransferEncodingDefect(MessageDefect):
+ """An invalid content transfer encoding was set on the multipart itself."""
+
+class UndecodableBytesDefect(MessageDefect):
+ """Header contained bytes that could not be decoded"""
+
+class InvalidBase64PaddingDefect(MessageDefect):
+ """base64 encoded sequence had an incorrect length"""
+
+class InvalidBase64CharactersDefect(MessageDefect):
+ """base64 encoded sequence had characters not in base64 alphabet"""
+
+# These errors are specific to header parsing.
+
+class HeaderDefect(MessageDefect):
+ """Base class for a header defect."""
+
+ def __init__(self, *args, **kw):
+ super().__init__(*args, **kw)
+
+class InvalidHeaderDefect(HeaderDefect):
+ """Header is not valid, message gives details."""
+
+class HeaderMissingRequiredValue(HeaderDefect):
+ """A header that must have a value had none"""
+
+class NonPrintableDefect(HeaderDefect):
+ """ASCII characters outside the ascii-printable range found"""
+
+ def __init__(self, non_printables):
+ super().__init__(non_printables)
+ self.non_printables = non_printables
+
+ def __str__(self):
+ return ("the following ASCII non-printables found in header: "
+ "{}".format(self.non_printables))
+
+class ObsoleteHeaderDefect(HeaderDefect):
+ """Header uses syntax declared obsolete by RFC 5322"""
+
+class NonASCIILocalPartDefect(HeaderDefect):
+ """local_part contains non-ASCII characters"""
+ # This defect only occurs during unicode parsing, not when
+ # parsing messages decoded from binary.
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index aa8a2ff..56f50df 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -25,6 +25,7 @@ import re
from email import errors
from email import message
+from email._policybase import compat32
NLCRE = re.compile('\r\n|\r|\n')
NLCRE_bol = re.compile('(\r\n|\r|\n)')
@@ -120,9 +121,6 @@ class BufferedSubFile(object):
# Reverse and insert at the front of the lines.
self._lines[:0] = lines[::-1]
- def is_closed(self):
- return self._closed
-
def __iter__(self):
return self
@@ -137,9 +135,22 @@ class BufferedSubFile(object):
class FeedParser:
"""A feed-style parser of email."""
- def __init__(self, _factory=message.Message):
- """_factory is called with no arguments to create a new message obj"""
+ def __init__(self, _factory=message.Message, *, policy=compat32):
+ """_factory is called with no arguments to create a new message obj
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ """
self._factory = _factory
+ self.policy = policy
+ try:
+ _factory(policy=self.policy)
+ self._factory_kwds = lambda: {'policy': self.policy}
+ except TypeError:
+ # Assume this is an old-style factory
+ self._factory_kwds = lambda: {}
self._input = BufferedSubFile()
self._msgstack = []
self._parse = self._parsegen().__next__
@@ -171,11 +182,12 @@ class FeedParser:
# Look for final set of defects
if root.get_content_maintype() == 'multipart' \
and not root.is_multipart():
- root.defects.append(errors.MultipartInvariantViolationDefect())
+ defect = errors.MultipartInvariantViolationDefect()
+ self.policy.handle_defect(root, defect)
return root
def _new_message(self):
- msg = self._factory()
+ msg = self._factory(**self._factory_kwds())
if self._cur and self._cur.get_content_type() == 'multipart/digest':
msg.set_default_type('message/rfc822')
if self._msgstack:
@@ -207,6 +219,8 @@ class FeedParser:
# (i.e. newline), just throw it away. Otherwise the line is
# part of the body so push it back.
if not NLCRE.match(line):
+ defect = errors.MissingHeaderBodySeparatorDefect()
+ self.policy.handle_defect(self._cur, defect)
self._input.unreadline(line)
break
headers.append(line)
@@ -284,7 +298,8 @@ class FeedParser:
# defined a boundary. That's a problem which we'll handle by
# reading everything until the EOF and marking the message as
# defective.
- self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
+ defect = errors.NoBoundaryInMultipartDefect()
+ self.policy.handle_defect(self._cur, defect)
lines = []
for line in self._input:
if line is NeedMoreData:
@@ -293,6 +308,11 @@ class FeedParser:
lines.append(line)
self._cur.set_payload(EMPTYSTRING.join(lines))
return
+ # Make sure a valid content type was specified per RFC 2045:6.4.
+ if (self._cur.get('content-transfer-encoding', '8bit').lower()
+ not in ('7bit', '8bit', 'binary')):
+ defect = errors.InvalidMultipartContentTransferEncodingDefect()
+ self.policy.handle_defect(self._cur, defect)
# Create a line match predicate which matches the inter-part
# boundary as well as the end-of-multipart boundary. Don't push
# this onto the input stream until we've scanned past the
@@ -304,6 +324,7 @@ class FeedParser:
capturing_preamble = True
preamble = []
linesep = False
+ close_boundary_seen = False
while True:
line = self._input.readline()
if line is NeedMoreData:
@@ -318,6 +339,7 @@ class FeedParser:
# the closing boundary, then we need to initialize the
# epilogue with the empty string (see below).
if mo.group('end'):
+ close_boundary_seen = True
linesep = mo.group('linesep')
break
# We saw an inter-part boundary. Were we in the preamble?
@@ -386,9 +408,9 @@ class FeedParser:
# We've seen either the EOF or the end boundary. If we're still
# capturing the preamble, we never saw the start boundary. Note
# that as a defect and store the captured text as the payload.
- # Everything from here to the EOF is epilogue.
if capturing_preamble:
- self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
+ defect = errors.StartBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
self._cur.set_payload(EMPTYSTRING.join(preamble))
epilogue = []
for line in self._input:
@@ -397,8 +419,15 @@ class FeedParser:
continue
self._cur.epilogue = EMPTYSTRING.join(epilogue)
return
- # If the end boundary ended in a newline, we'll need to make sure
- # the epilogue isn't None
+ # If we're not processing the preamble, then we might have seen
+ # EOF without seeing that end boundary...that is also a defect.
+ if not close_boundary_seen:
+ defect = errors.CloseBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
+ return
+ # Everything from here to the EOF is epilogue. If the end boundary
+ # ended in a newline, we'll need to make sure the epilogue isn't
+ # None
if linesep:
epilogue = ['']
else:
@@ -440,14 +469,12 @@ class FeedParser:
# is illegal, so let's note the defect, store the illegal
# line, and ignore it for purposes of headers.
defect = errors.FirstHeaderLineIsContinuationDefect(line)
- self._cur.defects.append(defect)
+ self.policy.handle_defect(self._cur, defect)
continue
lastvalue.append(line)
continue
if lastheader:
- # XXX reconsider the joining of folded lines
- lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
- self._cur[lastheader] = lhdr
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
lastheader, lastvalue = '', []
# Check for envelope header, i.e. unix-from
if line.startswith('From '):
@@ -471,19 +498,17 @@ class FeedParser:
self._cur.defects.append(defect)
continue
# Split the line on the colon separating field name from value.
+ # There will always be a colon, because if there wasn't the part of
+ # the parser that calls us would have started parsing the body.
i = line.find(':')
- if i < 0:
- defect = errors.MalformedHeaderDefect(line)
- self._cur.defects.append(defect)
- continue
+ assert i>0, "_parse_headers fed line with no : and no leading WS"
lastheader = line[:i]
- lastvalue = [line[i+1:].lstrip()]
+ lastvalue = [line]
# Done with all the lines, so handle the last header.
if lastheader:
- # XXX reconsider the joining of folded lines
- self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+
-
class BytesFeedParser(FeedParser):
"""Like FeedParser, but feed accepts bytes."""
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index c6bfb70..899adbc 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -13,8 +13,10 @@ import random
import warnings
from io import StringIO, BytesIO
+from email._policybase import compat32
from email.header import Header
-from email.message import _has_surrogates
+from email.utils import _has_surrogates
+import email.charset as _charset
UNDERSCORE = '_'
NL = '\n' # XXX: no longer used by the code below.
@@ -33,7 +35,8 @@ class Generator:
# Public interface
#
- def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *,
+ policy=None):
"""Create the generator for message flattening.
outfp is the output file-like object for writing the message to. It
@@ -49,16 +52,22 @@ class Generator:
defined in the Header class. Set maxheaderlen to zero to disable
header wrapping. The default is 78, as recommended (but not required)
by RFC 2822.
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the generator's operation. The default policy maintains
+ backward compatibility.
+
"""
self._fp = outfp
self._mangle_from_ = mangle_from_
- self._maxheaderlen = maxheaderlen
+ self.maxheaderlen = maxheaderlen
+ self.policy = policy
def write(self, s):
# Just delegate to the file object
self._fp.write(s)
- def flatten(self, msg, unixfrom=False, linesep='\n'):
+ def flatten(self, msg, unixfrom=False, linesep=None):
r"""Print the message object tree rooted at msg to the output file
specified when the Generator instance was created.
@@ -70,29 +79,47 @@ class Generator:
Note that for subobjects, no From_ line is printed.
linesep specifies the characters used to indicate a new line in
- the output. The default value is the most useful for typical
- Python applications, but it can be set to \r\n to produce RFC-compliant
- line separators when needed.
+ the output. The default value is determined by the policy.
"""
# We use the _XXX constants for operating on data that comes directly
# from the msg, and _encoded_XXX constants for operating on data that
# has already been converted (to bytes in the BytesGenerator) and
# inserted into a temporary buffer.
- self._NL = linesep
- self._encoded_NL = self._encode(linesep)
+ policy = msg.policy if self.policy is None else self.policy
+ if linesep is not None:
+ policy = policy.clone(linesep=linesep)
+ if self.maxheaderlen is not None:
+ policy = policy.clone(max_line_length=self.maxheaderlen)
+ self._NL = policy.linesep
+ self._encoded_NL = self._encode(self._NL)
self._EMPTY = ''
self._encoded_EMTPY = self._encode('')
- if unixfrom:
- ufrom = msg.get_unixfrom()
- if not ufrom:
- ufrom = 'From nobody ' + time.ctime(time.time())
- self.write(ufrom + self._NL)
- self._write(msg)
+ # Because we use clone (below) when we recursively process message
+ # subparts, and because clone uses the computed policy (not None),
+ # submessages will automatically get set to the computed policy when
+ # they are processed by this code.
+ old_gen_policy = self.policy
+ old_msg_policy = msg.policy
+ try:
+ self.policy = policy
+ msg.policy = policy
+ if unixfrom:
+ ufrom = msg.get_unixfrom()
+ if not ufrom:
+ ufrom = 'From nobody ' + time.ctime(time.time())
+ self.write(ufrom + self._NL)
+ self._write(msg)
+ finally:
+ self.policy = old_gen_policy
+ msg.policy = old_msg_policy
def clone(self, fp):
"""Clone this generator with the exact same options."""
- return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
+ return self.__class__(fp,
+ self._mangle_from_,
+ None, # Use policy setting, which we've adjusted
+ policy=self.policy)
#
# Protected interface - undocumented ;/
@@ -167,16 +194,8 @@ class Generator:
#
def _write_headers(self, msg):
- for h, v in msg.items():
- self.write('%s: ' % h)
- if isinstance(v, Header):
- self.write(v.encode(
- maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL)
- else:
- # Header's got lots of smarts, so use it.
- header = Header(v, maxlinelen=self._maxheaderlen,
- header_name=h)
- self.write(header.encode(linesep=self._NL)+self._NL)
+ for h, v in msg.raw_items():
+ self.write(self.policy.fold(h, v))
# A blank line always separates headers from body
self.write(self._NL)
@@ -265,12 +284,12 @@ class Generator:
# The contents of signed parts has to stay unmodified in order to keep
# the signature intact per RFC1847 2.1, so we disable header wrapping.
# RDM: This isn't enough to completely preserve the part, but it helps.
- old_maxheaderlen = self._maxheaderlen
+ p = self.policy
+ self.policy = p.clone(max_line_length=0)
try:
- self._maxheaderlen = 0
self._handle_multipart(msg)
finally:
- self._maxheaderlen = old_maxheaderlen
+ self.policy = p
def _handle_message_delivery_status(self, msg):
# We can't just write the headers directly to self's file object
@@ -305,10 +324,12 @@ class Generator:
# message/rfc822. Such messages are generated by, for example,
# Groupwise when forwarding unadorned messages. (Issue 7970.) So
# in that case we just emit the string body.
- payload = msg.get_payload()
+ payload = msg._payload
if isinstance(payload, list):
g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
payload = s.getvalue()
+ else:
+ payload = self._encode(payload)
self._fp.write(payload)
# This used to be a module level function; we use a classmethod for this
@@ -344,7 +365,10 @@ class BytesGenerator(Generator):
Functionally identical to the base Generator except that the output is
bytes and not string. When surrogates were used in the input to encode
- bytes, these are decoded back to bytes for output.
+ bytes, these are decoded back to bytes for output. If the policy has
+ cte_type set to 7bit, then the message is transformed such that the
+ non-ASCII bytes are properly content transfer encoded, using the charset
+ unknown-8bit.
The outfp object must accept bytes in its write method.
"""
@@ -365,23 +389,8 @@ class BytesGenerator(Generator):
def _write_headers(self, msg):
# This is almost the same as the string version, except for handling
# strings with 8bit bytes.
- for h, v in msg._headers:
- self.write('%s: ' % h)
- if isinstance(v, Header):
- self.write(v.encode(maxlinelen=self._maxheaderlen)+self._NL)
- elif _has_surrogates(v):
- # If we have raw 8bit data in a byte string, we have no idea
- # what the encoding is. There is no safe way to split this
- # string. If it's ascii-subset, then we could do a normal
- # ascii split, but if it's multibyte then we could break the
- # string. There's no way to know so the least harm seems to
- # be to not split the string and risk it being too long.
- self.write(v+NL)
- else:
- # Header's got lots of smarts and this string is safe...
- header = Header(v, maxlinelen=self._maxheaderlen,
- header_name=h)
- self.write(header.encode(linesep=self._NL)+self._NL)
+ for h, v in msg.raw_items():
+ self._fp.write(self.policy.fold_binary(h, v))
# A blank line always separates headers from body
self.write(self._NL)
@@ -390,7 +399,7 @@ class BytesGenerator(Generator):
# just write it back out.
if msg._payload is None:
return
- if _has_surrogates(msg._payload):
+ if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
if self._mangle_from_:
msg._payload = fcre.sub(">From ", msg._payload)
self.write(msg._payload)
diff --git a/Lib/email/header.py b/Lib/email/header.py
index 3250d36..a89219d 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -40,7 +40,6 @@ ecre = re.compile(r'''
\? # literal ?
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
\?= # literal ?=
- (?=[ \t]|$) # whitespace or the end of the string
''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
# Field name regexp, including trailing colon, but not separating whitespace,
@@ -86,8 +85,12 @@ def decode_header(header):
words = []
for line in header.splitlines():
parts = ecre.split(line)
+ first = True
while parts:
- unencoded = parts.pop(0).strip()
+ unencoded = parts.pop(0)
+ if first:
+ unencoded = unencoded.lstrip()
+ first = False
if unencoded:
words.append((unencoded, None, None))
if parts:
@@ -95,6 +98,16 @@ def decode_header(header):
encoding = parts.pop(0).lower()
encoded = parts.pop(0)
words.append((encoded, encoding, charset))
+ # Now loop over words and remove words that consist of whitespace
+ # between two encoded strings.
+ import sys
+ droplist = []
+ for n, w in enumerate(words):
+ if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
+ droplist.append(n-1)
+ for d in reversed(droplist):
+ del words[d]
+
# The next step is to decode each encoded word by applying the reverse
# base64 or quopri transformation. decoded_words is now a list of the
# form (decoded_word, charset).
@@ -217,22 +230,27 @@ class Header:
self._normalize()
uchunks = []
lastcs = None
+ lastspace = None
for string, charset in self._chunks:
# We must preserve spaces between encoded and non-encoded word
# boundaries, which means for us we need to add a space when we go
# from a charset to None/us-ascii, or from None/us-ascii to a
# charset. Only do this for the second and subsequent chunks.
+ # Don't add a space if the None/us-ascii string already has
+ # a space (trailing or leading depending on transition)
nextcs = charset
if nextcs == _charset.UNKNOWN8BIT:
original_bytes = string.encode('ascii', 'surrogateescape')
string = original_bytes.decode('ascii', 'replace')
if uchunks:
+ hasspace = string and self._nonctext(string[0])
if lastcs not in (None, 'us-ascii'):
- if nextcs in (None, 'us-ascii'):
+ if nextcs in (None, 'us-ascii') and not hasspace:
uchunks.append(SPACE)
nextcs = None
- elif nextcs not in (None, 'us-ascii'):
+ elif nextcs not in (None, 'us-ascii') and not lastspace:
uchunks.append(SPACE)
+ lastspace = string and self._nonctext(string[-1])
lastcs = nextcs
uchunks.append(string)
return EMPTYSTRING.join(uchunks)
@@ -291,6 +309,11 @@ class Header:
charset = UTF8
self._chunks.append((s, charset))
+ def _nonctext(self, s):
+ """True if string s is not a ctext character of RFC822.
+ """
+ return s.isspace() or s in ('(', ')', '\\')
+
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
r"""Encode a message header into an RFC-compliant format.
@@ -334,7 +357,20 @@ class Header:
maxlinelen = 1000000
formatter = _ValueFormatter(self._headerlen, maxlinelen,
self._continuation_ws, splitchars)
+ lastcs = None
+ hasspace = lastspace = None
for string, charset in self._chunks:
+ if hasspace is not None:
+ hasspace = string and self._nonctext(string[0])
+ import sys
+ if lastcs not in (None, 'us-ascii'):
+ if not hasspace or charset not in (None, 'us-ascii'):
+ formatter.add_transition()
+ elif charset not in (None, 'us-ascii') and not lastspace:
+ formatter.add_transition()
+ lastspace = string and self._nonctext(string[-1])
+ lastcs = charset
+ hasspace = False
lines = string.splitlines()
if lines:
formatter.feed('', lines[0], charset)
@@ -351,6 +387,7 @@ class Header:
formatter.feed(fws, sline, charset)
if len(lines) > 1:
formatter.newline()
+ if self._chunks:
formatter.add_transition()
value = formatter._str(linesep)
if _embeded_header.search(value):
diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py
new file mode 100644
index 0000000..1fae950
--- /dev/null
+++ b/Lib/email/headerregistry.py
@@ -0,0 +1,583 @@
+"""Representing and manipulating email headers via custom objects.
+
+This module provides an implementation of the HeaderRegistry API.
+The implementation is designed to flexibly follow RFC5322 rules.
+
+Eventually HeaderRegistry will be a public API, but it isn't yet,
+and will probably change some before that happens.
+
+"""
+
+from email import utils
+from email import errors
+from email import _header_value_parser as parser
+
+class Address:
+
+ def __init__(self, display_name='', username='', domain='', addr_spec=None):
+ """Create an object represeting a full email address.
+
+ An address can have a 'display_name', a 'username', and a 'domain'. In
+ addition to specifying the username and domain separately, they may be
+ specified together by using the addr_spec keyword *instead of* the
+ username and domain keywords. If an addr_spec string is specified it
+ must be properly quoted according to RFC 5322 rules; an error will be
+ raised if it is not.
+
+ An Address object has display_name, username, domain, and addr_spec
+ attributes, all of which are read-only. The addr_spec and the string
+ value of the object are both quoted according to RFC5322 rules, but
+ without any Content Transfer Encoding.
+
+ """
+ # This clause with its potential 'raise' may only happen when an
+ # application program creates an Address object using an addr_spec
+ # keyword. The email library code itself must always supply username
+ # and domain.
+ if addr_spec is not None:
+ if username or domain:
+ raise TypeError("addrspec specified when username and/or "
+ "domain also specified")
+ a_s, rest = parser.get_addr_spec(addr_spec)
+ if rest:
+ raise ValueError("Invalid addr_spec; only '{}' "
+ "could be parsed from '{}'".format(
+ a_s, addr_spec))
+ if a_s.all_defects:
+ raise a_s.all_defects[0]
+ username = a_s.local_part
+ domain = a_s.domain
+ self._display_name = display_name
+ self._username = username
+ self._domain = domain
+
+ @property
+ def display_name(self):
+ return self._display_name
+
+ @property
+ def username(self):
+ return self._username
+
+ @property
+ def domain(self):
+ return self._domain
+
+ @property
+ def addr_spec(self):
+ """The addr_spec (username@domain) portion of the address, quoted
+ according to RFC 5322 rules, but with no Content Transfer Encoding.
+ """
+ nameset = set(self.username)
+ if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
+ lp = parser.quote_string(self.username)
+ else:
+ lp = self.username
+ if self.domain:
+ return lp + '@' + self.domain
+ if not lp:
+ return '<>'
+ return lp
+
+ def __repr__(self):
+ return "Address(display_name={!r}, username={!r}, domain={!r})".format(
+ self.display_name, self.username, self.domain)
+
+ def __str__(self):
+ nameset = set(self.display_name)
+ if len(nameset) > len(nameset-parser.SPECIALS):
+ disp = parser.quote_string(self.display_name)
+ else:
+ disp = self.display_name
+ if disp:
+ addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
+ return "{} <{}>".format(disp, addr_spec)
+ return self.addr_spec
+
+ def __eq__(self, other):
+ if type(other) != type(self):
+ return False
+ return (self.display_name == other.display_name and
+ self.username == other.username and
+ self.domain == other.domain)
+
+
+class Group:
+
+ def __init__(self, display_name=None, addresses=None):
+ """Create an object representing an address group.
+
+ An address group consists of a display_name followed by colon and an
+ list of addresses (see Address) terminated by a semi-colon. The Group
+ is created by specifying a display_name and a possibly empty list of
+ Address objects. A Group can also be used to represent a single
+ address that is not in a group, which is convenient when manipulating
+ lists that are a combination of Groups and individual Addresses. In
+ this case the display_name should be set to None. In particular, the
+ string representation of a Group whose display_name is None is the same
+ as the Address object, if there is one and only one Address object in
+ the addresses list.
+
+ """
+ self._display_name = display_name
+ self._addresses = tuple(addresses) if addresses else tuple()
+
+ @property
+ def display_name(self):
+ return self._display_name
+
+ @property
+ def addresses(self):
+ return self._addresses
+
+ def __repr__(self):
+ return "Group(display_name={!r}, addresses={!r}".format(
+ self.display_name, self.addresses)
+
+ def __str__(self):
+ if self.display_name is None and len(self.addresses)==1:
+ return str(self.addresses[0])
+ disp = self.display_name
+ if disp is not None:
+ nameset = set(disp)
+ if len(nameset) > len(nameset-parser.SPECIALS):
+ disp = parser.quote_string(disp)
+ adrstr = ", ".join(str(x) for x in self.addresses)
+ adrstr = ' ' + adrstr if adrstr else adrstr
+ return "{}:{};".format(disp, adrstr)
+
+ def __eq__(self, other):
+ if type(other) != type(self):
+ return False
+ return (self.display_name == other.display_name and
+ self.addresses == other.addresses)
+
+
+# Header Classes #
+
+class BaseHeader(str):
+
+ """Base class for message headers.
+
+ Implements generic behavior and provides tools for subclasses.
+
+ A subclass must define a classmethod named 'parse' that takes an unfolded
+ value string and a dictionary as its arguments. The dictionary will
+ contain one key, 'defects', initialized to an empty list. After the call
+ the dictionary must contain two additional keys: parse_tree, set to the
+ parse tree obtained from parsing the header, and 'decoded', set to the
+ string value of the idealized representation of the data from the value.
+ (That is, encoded words are decoded, and values that have canonical
+ representations are so represented.)
+
+ The defects key is intended to collect parsing defects, which the message
+ parser will subsequently dispose of as appropriate. The parser should not,
+ insofar as practical, raise any errors. Defects should be added to the
+ list instead. The standard header parsers register defects for RFC
+ compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
+ errors.
+
+ The parse method may add additional keys to the dictionary. In this case
+ the subclass must define an 'init' method, which will be passed the
+ dictionary as its keyword arguments. The method should use (usually by
+ setting them as the value of similarly named attributes) and remove all the
+ extra keys added by its parse method, and then use super to call its parent
+ class with the remaining arguments and keywords.
+
+ The subclass should also make sure that a 'max_count' attribute is defined
+ that is either None or 1. XXX: need to better define this API.
+
+ """
+
+ def __new__(cls, name, value):
+ kwds = {'defects': []}
+ cls.parse(value, kwds)
+ if utils._has_surrogates(kwds['decoded']):
+ kwds['decoded'] = utils._sanitize(kwds['decoded'])
+ self = str.__new__(cls, kwds['decoded'])
+ del kwds['decoded']
+ self.init(name, **kwds)
+ return self
+
+ def init(self, name, *, parse_tree, defects):
+ self._name = name
+ self._parse_tree = parse_tree
+ self._defects = defects
+
+ @property
+ def name(self):
+ return self._name
+
+ @property
+ def defects(self):
+ return tuple(self._defects)
+
+ def __reduce__(self):
+ return (
+ _reconstruct_header,
+ (
+ self.__class__.__name__,
+ self.__class__.__bases__,
+ str(self),
+ ),
+ self.__dict__)
+
+ @classmethod
+ def _reconstruct(cls, value):
+ return str.__new__(cls, value)
+
+ def fold(self, *, policy):
+ """Fold header according to policy.
+
+ The parsed representation of the header is folded according to
+ RFC5322 rules, as modified by the policy. If the parse tree
+ contains surrogateescaped bytes, the bytes are CTE encoded using
+ the charset 'unknown-8bit".
+
+ Any non-ASCII characters in the parse tree are CTE encoded using
+ charset utf-8. XXX: make this a policy setting.
+
+ The returned value is an ASCII-only string possibly containing linesep
+ characters, and ending with a linesep character. The string includes
+ the header name and the ': ' separator.
+
+ """
+ # At some point we need to only put fws here if it was in the source.
+ header = parser.Header([
+ parser.HeaderLabel([
+ parser.ValueTerminal(self.name, 'header-name'),
+ parser.ValueTerminal(':', 'header-sep')]),
+ parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
+ self._parse_tree])
+ return header.fold(policy=policy)
+
+
+def _reconstruct_header(cls_name, bases, value):
+ return type(cls_name, bases, {})._reconstruct(value)
+
+
+class UnstructuredHeader:
+
+ max_count = None
+ value_parser = staticmethod(parser.get_unstructured)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = cls.value_parser(value)
+ kwds['decoded'] = str(kwds['parse_tree'])
+
+
+class UniqueUnstructuredHeader(UnstructuredHeader):
+
+ max_count = 1
+
+
+class DateHeader:
+
+ """Header whose value consists of a single timestamp.
+
+ Provides an additional attribute, datetime, which is either an aware
+ datetime using a timezone, or a naive datetime if the timezone
+ in the input string is -0000. Also accepts a datetime as input.
+ The 'value' attribute is the normalized form of the timestamp,
+ which means it is the output of format_datetime on the datetime.
+ """
+
+ max_count = None
+
+ # This is used only for folding, not for creating 'decoded'.
+ value_parser = staticmethod(parser.get_unstructured)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ if not value:
+ kwds['defects'].append(errors.HeaderMissingRequiredValue())
+ kwds['datetime'] = None
+ kwds['decoded'] = ''
+ kwds['parse_tree'] = parser.TokenList()
+ return
+ if isinstance(value, str):
+ value = utils.parsedate_to_datetime(value)
+ kwds['datetime'] = value
+ kwds['decoded'] = utils.format_datetime(kwds['datetime'])
+ kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+ def init(self, *args, **kw):
+ self._datetime = kw.pop('datetime')
+ super().init(*args, **kw)
+
+ @property
+ def datetime(self):
+ return self._datetime
+
+
+class UniqueDateHeader(DateHeader):
+
+ max_count = 1
+
+
+class AddressHeader:
+
+ max_count = None
+
+ @staticmethod
+ def value_parser(value):
+ address_list, value = parser.get_address_list(value)
+ assert not value, 'this should not happen'
+ return address_list
+
+ @classmethod
+ def parse(cls, value, kwds):
+ if isinstance(value, str):
+ # We are translating here from the RFC language (address/mailbox)
+ # to our API language (group/address).
+ kwds['parse_tree'] = address_list = cls.value_parser(value)
+ groups = []
+ for addr in address_list.addresses:
+ groups.append(Group(addr.display_name,
+ [Address(mb.display_name or '',
+ mb.local_part or '',
+ mb.domain or '')
+ for mb in addr.all_mailboxes]))
+ defects = list(address_list.all_defects)
+ else:
+ # Assume it is Address/Group stuff
+ if not hasattr(value, '__iter__'):
+ value = [value]
+ groups = [Group(None, [item]) if not hasattr(item, 'addresses')
+ else item
+ for item in value]
+ defects = []
+ kwds['groups'] = groups
+ kwds['defects'] = defects
+ kwds['decoded'] = ', '.join([str(item) for item in groups])
+ if 'parse_tree' not in kwds:
+ kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+ def init(self, *args, **kw):
+ self._groups = tuple(kw.pop('groups'))
+ self._addresses = None
+ super().init(*args, **kw)
+
+ @property
+ def groups(self):
+ return self._groups
+
+ @property
+ def addresses(self):
+ if self._addresses is None:
+ self._addresses = tuple([address for group in self._groups
+ for address in group.addresses])
+ return self._addresses
+
+
+class UniqueAddressHeader(AddressHeader):
+
+ max_count = 1
+
+
+class SingleAddressHeader(AddressHeader):
+
+ @property
+ def address(self):
+ if len(self.addresses)!=1:
+ raise ValueError(("value of single address header {} is not "
+ "a single address").format(self.name))
+ return self.addresses[0]
+
+
+class UniqueSingleAddressHeader(SingleAddressHeader):
+
+ max_count = 1
+
+
+class MIMEVersionHeader:
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_mime_version)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ kwds['major'] = None if parse_tree.minor is None else parse_tree.major
+ kwds['minor'] = parse_tree.minor
+ if parse_tree.minor is not None:
+ kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
+ else:
+ kwds['version'] = None
+
+ def init(self, *args, **kw):
+ self._version = kw.pop('version')
+ self._major = kw.pop('major')
+ self._minor = kw.pop('minor')
+ super().init(*args, **kw)
+
+ @property
+ def major(self):
+ return self._major
+
+ @property
+ def minor(self):
+ return self._minor
+
+ @property
+ def version(self):
+ return self._version
+
+
+class ParameterizedMIMEHeader:
+
+ # Mixin that handles the params dict. Must be subclassed and
+ # a property value_parser for the specific header provided.
+
+ max_count = 1
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ if parse_tree.params is None:
+ kwds['params'] = {}
+ else:
+ # The MIME RFCs specify that parameter ordering is arbitrary.
+ kwds['params'] = {utils._sanitize(name).lower():
+ utils._sanitize(value)
+ for name, value in parse_tree.params}
+
+ def init(self, *args, **kw):
+ self._params = kw.pop('params')
+ super().init(*args, **kw)
+
+ @property
+ def params(self):
+ return self._params.copy()
+
+
+class ContentTypeHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_type_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._maintype = utils._sanitize(self._parse_tree.maintype)
+ self._subtype = utils._sanitize(self._parse_tree.subtype)
+
+ @property
+ def maintype(self):
+ return self._maintype
+
+ @property
+ def subtype(self):
+ return self._subtype
+
+ @property
+ def content_type(self):
+ return self.maintype + '/' + self.subtype
+
+
+class ContentDispositionHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_disposition_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ cd = self._parse_tree.content_disposition
+ self._content_disposition = cd if cd is None else utils._sanitize(cd)
+
+ @property
+ def content_disposition(self):
+ return self._content_disposition
+
+
+class ContentTransferEncodingHeader:
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._cte = utils._sanitize(self._parse_tree.cte)
+
+ @property
+ def cte(self):
+ return self._cte
+
+
+# The header factory #
+
+_default_header_map = {
+ 'subject': UniqueUnstructuredHeader,
+ 'date': UniqueDateHeader,
+ 'resent-date': DateHeader,
+ 'orig-date': UniqueDateHeader,
+ 'sender': UniqueSingleAddressHeader,
+ 'resent-sender': SingleAddressHeader,
+ 'to': UniqueAddressHeader,
+ 'resent-to': AddressHeader,
+ 'cc': UniqueAddressHeader,
+ 'resent-cc': AddressHeader,
+ 'bcc': UniqueAddressHeader,
+ 'resent-bcc': AddressHeader,
+ 'from': UniqueAddressHeader,
+ 'resent-from': AddressHeader,
+ 'reply-to': UniqueAddressHeader,
+ 'mime-version': MIMEVersionHeader,
+ 'content-type': ContentTypeHeader,
+ 'content-disposition': ContentDispositionHeader,
+ 'content-transfer-encoding': ContentTransferEncodingHeader,
+ }
+
+class HeaderRegistry:
+
+ """A header_factory and header registry."""
+
+ def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
+ use_default_map=True):
+ """Create a header_factory that works with the Policy API.
+
+ base_class is the class that will be the last class in the created
+ header class's __bases__ list. default_class is the class that will be
+ used if "name" (see __call__) does not appear in the registry.
+ use_default_map controls whether or not the default mapping of names to
+ specialized classes is copied in to the registry when the factory is
+ created. The default is True.
+
+ """
+ self.registry = {}
+ self.base_class = base_class
+ self.default_class = default_class
+ if use_default_map:
+ self.registry.update(_default_header_map)
+
+ def map_to_type(self, name, cls):
+ """Register cls as the specialized class for handling "name" headers.
+
+ """
+ self.registry[name.lower()] = cls
+
+ def __getitem__(self, name):
+ cls = self.registry.get(name.lower(), self.default_class)
+ return type('_'+cls.__name__, (cls, self.base_class), {})
+
+ def __call__(self, name, value):
+ """Create a header instance for header 'name' from 'value'.
+
+ Creates a header instance by creating a specialized class for parsing
+ and representing the specified header by combining the factory
+ base_class with a specialized class from the registry or the
+ default_class, and passing the name and value to the constructed
+ class's constructor.
+
+ """
+ return self[name](name, value)
diff --git a/Lib/email/message.py b/Lib/email/message.py
index f1ffcdb..3feab52 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -10,14 +10,14 @@ import re
import uu
import base64
import binascii
-import warnings
from io import BytesIO, StringIO
# Intrapackage imports
from email import utils
from email import errors
-from email import header
+from email._policybase import compat32
from email import charset as _charset
+from email._encoded_words import decode_b
Charset = _charset.Charset
SEMISPACE = '; '
@@ -26,24 +26,6 @@ SEMISPACE = '; '
# existence of which force quoting of the parameter value.
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
-# How to figure out if we are processing strings that come from a byte
-# source with undecodable characters.
-_has_surrogates = re.compile(
- '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
-
-
-# Helper functions
-def _sanitize_header(name, value):
- # If the header value contains surrogates, return a Header using
- # the unknown-8bit charset to encode the bytes as encoded words.
- if not isinstance(value, str):
- # Assume it is already a header object
- return value
- if _has_surrogates(value):
- return header.Header(value, charset=_charset.UNKNOWN8BIT,
- header_name=name)
- else:
- return value
def _splitparam(param):
# Split header parameters. BAW: this may be too simple. It isn't
@@ -136,7 +118,8 @@ class Message:
you must use the explicit API to set or get all the headers. Not all of
the mapping methods are implemented.
"""
- def __init__(self):
+ def __init__(self, policy=compat32):
+ self.policy = policy
self._headers = []
self._unixfrom = None
self._payload = None
@@ -246,7 +229,7 @@ class Message:
cte = str(self.get('content-transfer-encoding', '')).lower()
# payload may be bytes here.
if isinstance(payload, str):
- if _has_surrogates(payload):
+ if utils._has_surrogates(payload):
bpayload = payload.encode('ascii', 'surrogateescape')
if not decode:
try:
@@ -267,11 +250,12 @@ class Message:
if cte == 'quoted-printable':
return utils._qdecode(bpayload)
elif cte == 'base64':
- try:
- return base64.b64decode(bpayload)
- except binascii.Error:
- # Incorrect padding
- return bpayload
+ # XXX: this is a bit of a hack; decode_b should probably be factored
+ # out somewhere, but I haven't figured out where yet.
+ value, defects = decode_b(b''.join(bpayload.splitlines()))
+ for defect in defects:
+ self.policy.handle_defect(self, defect)
+ return value
elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
in_file = BytesIO(bpayload)
out_file = BytesIO()
@@ -362,7 +346,17 @@ class Message:
Note: this does not overwrite an existing header with the same field
name. Use __delitem__() first to delete any existing headers.
"""
- self._headers.append((name, val))
+ max_count = self.policy.header_max_count(name)
+ if max_count:
+ lname = name.lower()
+ found = 0
+ for k, v in self._headers:
+ if k.lower() == lname:
+ found += 1
+ if found >= max_count:
+ raise ValueError("There may be at most {} {} headers "
+ "in a message".format(max_count, name))
+ self._headers.append(self.policy.header_store_parse(name, val))
def __delitem__(self, name):
"""Delete all occurrences of a header, if present.
@@ -401,7 +395,8 @@ class Message:
Any fields deleted and re-inserted are always appended to the header
list.
"""
- return [_sanitize_header(k, v) for k, v in self._headers]
+ return [self.policy.header_fetch_parse(k, v)
+ for k, v in self._headers]
def items(self):
"""Get all the message's header fields and values.
@@ -411,7 +406,8 @@ class Message:
Any fields deleted and re-inserted are always appended to the header
list.
"""
- return [(k, _sanitize_header(k, v)) for k, v in self._headers]
+ return [(k, self.policy.header_fetch_parse(k, v))
+ for k, v in self._headers]
def get(self, name, failobj=None):
"""Get a header value.
@@ -422,10 +418,29 @@ class Message:
name = name.lower()
for k, v in self._headers:
if k.lower() == name:
- return _sanitize_header(k, v)
+ return self.policy.header_fetch_parse(k, v)
return failobj
#
+ # "Internal" methods (public API, but only intended for use by a parser
+ # or generator, not normal application code.
+ #
+
+ def set_raw(self, name, value):
+ """Store name and value in the model without modification.
+
+ This is an "internal" API, intended only for use by a parser.
+ """
+ self._headers.append((name, value))
+
+ def raw_items(self):
+ """Return the (name, value) header pairs without modification.
+
+ This is an "internal" API, intended only for use by a generator.
+ """
+ return iter(self._headers.copy())
+
+ #
# Additional useful stuff
#
@@ -442,7 +457,7 @@ class Message:
name = name.lower()
for k, v in self._headers:
if k.lower() == name:
- values.append(_sanitize_header(k, v))
+ values.append(self.policy.header_fetch_parse(k, v))
if not values:
return failobj
return values
@@ -475,7 +490,7 @@ class Message:
parts.append(_formatparam(k.replace('_', '-'), v))
if _value is not None:
parts.insert(0, _value)
- self._headers.append((_name, SEMISPACE.join(parts)))
+ self[_name] = SEMISPACE.join(parts)
def replace_header(self, _name, _value):
"""Replace a header.
@@ -487,7 +502,7 @@ class Message:
_name = _name.lower()
for i, (k, v) in zip(range(len(self._headers)), self._headers):
if k.lower() == _name:
- self._headers[i] = (k, _value)
+ self._headers[i] = self.policy.header_store_parse(k, _value)
break
else:
raise KeyError(_name)
@@ -803,7 +818,8 @@ class Message:
parts.append(k)
else:
parts.append('%s=%s' % (k, v))
- newheaders.append((h, SEMISPACE.join(parts)))
+ val = SEMISPACE.join(parts)
+ newheaders.append(self.policy.header_store_parse(h, val))
else:
newheaders.append((h, v))
diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py
index 5747db5..80ff950 100644
--- a/Lib/email/mime/text.py
+++ b/Lib/email/mime/text.py
@@ -14,7 +14,7 @@ from email.mime.nonmultipart import MIMENonMultipart
class MIMEText(MIMENonMultipart):
"""Class for generating text/* type MIME documents."""
- def __init__(self, _text, _subtype='plain', _charset='us-ascii'):
+ def __init__(self, _text, _subtype='plain', _charset=None):
"""Create a text/* type MIME document.
_text is the string for this message object.
@@ -25,6 +25,18 @@ class MIMEText(MIMENonMultipart):
header. This defaults to "us-ascii". Note that as a side-effect, the
Content-Transfer-Encoding header will also be set.
"""
+
+ # If no _charset was specified, check to see see if there are non-ascii
+ # characters present. If not, use 'us-ascii', otherwise use utf-8.
+ # XXX: This can be removed once #7304 is fixed.
+ if _charset is None:
+ try:
+ _text.encode('us-ascii')
+ _charset = 'us-ascii'
+ except UnicodeEncodeError:
+ _charset = 'utf-8'
+
MIMENonMultipart.__init__(self, 'text', _subtype,
**{'charset': _charset})
+
self.set_payload(_text, _charset)
diff --git a/Lib/email/parser.py b/Lib/email/parser.py
index 1c931ea..1aab012 100644
--- a/Lib/email/parser.py
+++ b/Lib/email/parser.py
@@ -4,18 +4,19 @@
"""A parser of RFC 2822 and MIME email messages."""
-__all__ = ['Parser', 'HeaderParser', 'BytesParser']
+__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
import warnings
from io import StringIO, TextIOWrapper
from email.feedparser import FeedParser
from email.message import Message
+from email._policybase import compat32
class Parser:
- def __init__(self, *args, **kws):
+ def __init__(self, _class=Message, *, policy=compat32):
"""Parser of RFC 2822 and MIME email messages.
Creates an in-memory object tree representing the email message, which
@@ -30,28 +31,14 @@ class Parser:
_class is the class to instantiate for new message objects when they
must be created. This class must have a constructor that can take
zero arguments. Default is Message.Message.
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
"""
- if len(args) >= 1:
- if '_class' in kws:
- raise TypeError("Multiple values for keyword arg '_class'")
- kws['_class'] = args[0]
- if len(args) == 2:
- if 'strict' in kws:
- raise TypeError("Multiple values for keyword arg 'strict'")
- kws['strict'] = args[1]
- if len(args) > 2:
- raise TypeError('Too many arguments')
- if '_class' in kws:
- self._class = kws['_class']
- del kws['_class']
- else:
- self._class = Message
- if 'strict' in kws:
- warnings.warn("'strict' argument is deprecated (and ignored)",
- DeprecationWarning, 2)
- del kws['strict']
- if kws:
- raise TypeError('Unexpected keyword arguments')
+ self._class = _class
+ self.policy = policy
def parse(self, fp, headersonly=False):
"""Create a message structure from the data in a file.
@@ -61,7 +48,7 @@ class Parser:
parsing after reading the headers or not. The default is False,
meaning it parses the entire contents of the file.
"""
- feedparser = FeedParser(self._class)
+ feedparser = FeedParser(self._class, policy=self.policy)
if headersonly:
feedparser._set_headersonly()
while True:
@@ -134,3 +121,11 @@ class BytesParser:
"""
text = text.decode('ASCII', errors='surrogateescape')
return self.parser.parsestr(text, headersonly)
+
+
+class BytesHeaderParser(BytesParser):
+ def parse(self, fp, headersonly=True):
+ return BytesParser.parse(self, fp, headersonly=True)
+
+ def parsebytes(self, text, headersonly=True):
+ return BytesParser.parsebytes(self, text, headersonly=True)
diff --git a/Lib/email/policy.py b/Lib/email/policy.py
new file mode 100644
index 0000000..32cad0d
--- /dev/null
+++ b/Lib/email/policy.py
@@ -0,0 +1,188 @@
+"""This will be the home for the policy that hooks in the new
+code that adds all the email6 features.
+"""
+
+from email._policybase import Policy, Compat32, compat32, _extend_docstrings
+from email.utils import _has_surrogates
+from email.headerregistry import HeaderRegistry as HeaderRegistry
+
+__all__ = [
+ 'Compat32',
+ 'compat32',
+ 'Policy',
+ 'EmailPolicy',
+ 'default',
+ 'strict',
+ 'SMTP',
+ 'HTTP',
+ ]
+
+@_extend_docstrings
+class EmailPolicy(Policy):
+
+ """+
+ PROVISIONAL
+
+ The API extensions enabled by this this policy are currently provisional.
+ Refer to the documentation for details.
+
+ This policy adds new header parsing and folding algorithms. Instead of
+ simple strings, headers are custom objects with custom attributes
+ depending on the type of the field. The folding algorithm fully
+ implements RFCs 2047 and 5322.
+
+ In addition to the settable attributes listed above that apply to
+ all Policies, this policy adds the following additional attributes:
+
+ refold_source -- if the value for a header in the Message object
+ came from the parsing of some source, this attribute
+ indicates whether or not a generator should refold
+ that value when transforming the message back into
+ stream form. The possible values are:
+
+ none -- all source values use original folding
+ long -- source values that have any line that is
+ longer than max_line_length will be
+ refolded
+ all -- all values are refolded.
+
+ The default is 'long'.
+
+ header_factory -- a callable that takes two arguments, 'name' and
+ 'value', where 'name' is a header field name and
+ 'value' is an unfolded header field value, and
+ returns a string-like object that represents that
+ header. A default header_factory is provided that
+ understands some of the RFC5322 header field types.
+ (Currently address fields and date fields have
+ special treatment, while all other fields are
+ treated as unstructured. This list will be
+ completed before the extension is marked stable.)
+ """
+
+ refold_source = 'long'
+ header_factory = HeaderRegistry()
+
+ def __init__(self, **kw):
+ # Ensure that each new instance gets a unique header factory
+ # (as opposed to clones, which share the factory).
+ if 'header_factory' not in kw:
+ object.__setattr__(self, 'header_factory', HeaderRegistry())
+ super().__init__(**kw)
+
+ def header_max_count(self, name):
+ """+
+ The implementation for this class returns the max_count attribute from
+ the specialized header class that would be used to construct a header
+ of type 'name'.
+ """
+ return self.header_factory[name].max_count
+
+ # The logic of the next three methods is chosen such that it is possible to
+ # switch a Message object between a Compat32 policy and a policy derived
+ # from this class and have the results stay consistent. This allows a
+ # Message object constructed with this policy to be passed to a library
+ # that only handles Compat32 objects, or to receive such an object and
+ # convert it to use the newer style by just changing its policy. It is
+ # also chosen because it postpones the relatively expensive full rfc5322
+ # parse until as late as possible when parsing from source, since in many
+ # applications only a few headers will actually be inspected.
+
+ def header_source_parse(self, sourcelines):
+ """+
+ The name is parsed as everything up to the ':' and returned unmodified.
+ The value is determined by stripping leading whitespace off the
+ remainder of the first line, joining all subsequent lines together, and
+ stripping any trailing carriage return or linefeed characters. (This
+ is the same as Compat32).
+
+ """
+ name, value = sourcelines[0].split(':', 1)
+ value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+ return (name, value.rstrip('\r\n'))
+
+ def header_store_parse(self, name, value):
+ """+
+ The name is returned unchanged. If the input value has a 'name'
+ attribute and it matches the name ignoring case, the value is returned
+ unchanged. Otherwise the name and value are passed to header_factory
+ method, and the resulting custom header object is returned as the
+ value. In this case a ValueError is raised if the input value contains
+ CR or LF characters.
+
+ """
+ if hasattr(value, 'name') and value.name.lower() == name.lower():
+ return (name, value)
+ if isinstance(value, str) and len(value.splitlines())>1:
+ raise ValueError("Header values may not contain linefeed "
+ "or carriage return characters")
+ return (name, self.header_factory(name, value))
+
+ def header_fetch_parse(self, name, value):
+ """+
+ If the value has a 'name' attribute, it is returned to unmodified.
+ Otherwise the name and the value with any linesep characters removed
+ are passed to the header_factory method, and the resulting custom
+ header object is returned. Any surrogateescaped bytes get turned
+ into the unicode unknown-character glyph.
+
+ """
+ if hasattr(value, 'name'):
+ return value
+ return self.header_factory(name, ''.join(value.splitlines()))
+
+ def fold(self, name, value):
+ """+
+ Header folding is controlled by the refold_source policy setting. A
+ value is considered to be a 'source value' if and only if it does not
+ have a 'name' attribute (having a 'name' attribute means it is a header
+ object of some sort). If a source value needs to be refolded according
+ to the policy, it is converted into a custom header object by passing
+ the name and the value with any linesep characters removed to the
+ header_factory method. Folding of a custom header object is done by
+ calling its fold method with the current policy.
+
+ Source values are split into lines using splitlines. If the value is
+ not to be refolded, the lines are rejoined using the linesep from the
+ policy and returned. The exception is lines containing non-ascii
+ binary data. In that case the value is refolded regardless of the
+ refold_source setting, which causes the binary data to be CTE encoded
+ using the unknown-8bit charset.
+
+ """
+ return self._fold(name, value, refold_binary=True)
+
+ def fold_binary(self, name, value):
+ """+
+ The same as fold if cte_type is 7bit, except that the returned value is
+ bytes.
+
+ If cte_type is 8bit, non-ASCII binary data is converted back into
+ bytes. Headers with binary data are not refolded, regardless of the
+ refold_header setting, since there is no way to know whether the binary
+ data consists of single byte characters or multibyte characters.
+
+ """
+ folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
+ return folded.encode('ascii', 'surrogateescape')
+
+ def _fold(self, name, value, refold_binary=False):
+ if hasattr(value, 'name'):
+ return value.fold(policy=self)
+ maxlen = self.max_line_length if self.max_line_length else float('inf')
+ lines = value.splitlines()
+ refold = (self.refold_source == 'all' or
+ self.refold_source == 'long' and
+ (len(lines[0])+len(name)+2 > maxlen or
+ any(len(x) > maxlen for x in lines[1:])))
+ if refold or refold_binary and _has_surrogates(value):
+ return self.header_factory(name, ''.join(lines)).fold(policy=self)
+ return name + ': ' + self.linesep.join(lines) + self.linesep
+
+
+default = EmailPolicy()
+# Make the default policy use the class default header_factory
+del default.header_factory
+strict = default.clone(raise_on_defect=True)
+SMTP = default.clone(linesep='\r\n')
+HTTP = default.clone(linesep='\r\n', max_line_length=None)
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index ac4da37..6b6d7f4 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -11,12 +11,14 @@ __all__ = [
'encode_rfc2231',
'formataddr',
'formatdate',
+ 'format_datetime',
'getaddresses',
'make_msgid',
'mktime_tz',
'parseaddr',
'parsedate',
'parsedate_tz',
+ 'parsedate_to_datetime',
'unquote',
]
@@ -26,6 +28,7 @@ import time
import base64
import random
import socket
+import datetime
import urllib.parse
import warnings
from io import StringIO
@@ -34,14 +37,13 @@ from email._parseaddr import quote
from email._parseaddr import AddressList as _AddressList
from email._parseaddr import mktime_tz
-# We need wormarounds for bugs in these methods in older Pythons (see below)
-from email._parseaddr import parsedate as _parsedate
-from email._parseaddr import parsedate_tz as _parsedate_tz
+from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
from quopri import decodestring as _qdecode
# Intrapackage imports
from email.encoders import _bencode, _qencode
+from email.charset import Charset
COMMASPACE = ', '
EMPTYSTRING = ''
@@ -50,27 +52,53 @@ CRLF = '\r\n'
TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
-escapesre = re.compile(r'[][\\()"]')
+escapesre = re.compile(r'[\\"]')
+# How to figure out if we are processing strings that come from a byte
+# source with undecodable characters.
+_has_surrogates = re.compile(
+ '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
+
+# How to deal with a string containing bytes before handing it to the
+# application through the 'normal' interface.
+def _sanitize(string):
+ # Turn any escaped bytes into unicode 'unknown' char.
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ return original_bytes.decode('ascii', 'replace')
# Helpers
-def formataddr(pair):
+def formataddr(pair, charset='utf-8'):
"""The inverse of parseaddr(), this takes a 2-tuple of the form
(realname, email_address) and returns the string value suitable
for an RFC 2822 From, To or Cc header.
If the first element of pair is false, then the second element is
returned unmodified.
+
+ Optional charset if given is the character set that is used to encode
+ realname in case realname is not ASCII safe. Can be an instance of str or
+ a Charset-like object which has a header_encode method. Default is
+ 'utf-8'.
"""
name, address = pair
+ # The address MUST (per RFC) be ascii, so throw a UnicodeError if it isn't.
+ address.encode('ascii')
if name:
- quotes = ''
- if specialsre.search(name):
- quotes = '"'
- name = escapesre.sub(r'\\\g<0>', name)
- return '%s%s%s <%s>' % (quotes, name, quotes, address)
+ try:
+ name.encode('ascii')
+ except UnicodeEncodeError:
+ if isinstance(charset, str):
+ charset = Charset(charset)
+ encoded_name = charset.header_encode(name)
+ return "%s <%s>" % (encoded_name, address)
+ else:
+ quotes = ''
+ if specialsre.search(name):
+ quotes = '"'
+ name = escapesre.sub(r'\\\g<0>', name)
+ return '%s%s%s <%s>' % (quotes, name, quotes, address)
return address
@@ -94,6 +122,14 @@ ecre = re.compile(r'''
''', re.VERBOSE | re.IGNORECASE)
+def _format_timetuple_and_zone(timetuple, zone):
+ return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
+ ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
+ timetuple[2],
+ ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
+ timetuple[0], timetuple[3], timetuple[4], timetuple[5],
+ zone)
def formatdate(timeval=None, localtime=False, usegmt=False):
"""Returns a date string as specified by RFC 2822, e.g.:
@@ -138,14 +174,25 @@ def formatdate(timeval=None, localtime=False, usegmt=False):
zone = 'GMT'
else:
zone = '-0000'
- return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
- ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
- now[2],
- ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
- 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
- now[0], now[3], now[4], now[5],
- zone)
+ return _format_timetuple_and_zone(now, zone)
+def format_datetime(dt, usegmt=False):
+ """Turn a datetime into a date string as specified in RFC 2822.
+
+ If usegmt is True, dt must be an aware datetime with an offset of zero. In
+ this case 'GMT' will be rendered instead of the normal +0000 required by
+ RFC2822. This is to support HTTP headers involving date stamps.
+ """
+ now = dt.timetuple()
+ if usegmt:
+ if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
+ raise ValueError("usegmt option requires a UTC datetime")
+ zone = 'GMT'
+ elif dt.tzinfo is None:
+ zone = '-0000'
+ else:
+ zone = dt.strftime("%z")
+ return _format_timetuple_and_zone(now, zone)
def make_msgid(idstring=None, domain=None):
@@ -172,20 +219,12 @@ def make_msgid(idstring=None, domain=None):
return msgid
-
-# These functions are in the standalone mimelib version only because they've
-# subsequently been fixed in the latest Python versions. We use this to worm
-# around broken older Pythons.
-def parsedate(data):
- if not data:
- return None
- return _parsedate(data)
-
-
-def parsedate_tz(data):
- if not data:
- return None
- return _parsedate_tz(data)
+def parsedate_to_datetime(data):
+ *dtuple, tz = _parsedate_tz(data)
+ if tz is None:
+ return datetime.datetime(*dtuple[:6])
+ return datetime.datetime(*dtuple[:6],
+ tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
def parseaddr(addr):
@@ -304,3 +343,49 @@ def collapse_rfc2231_value(value, errors='replace',
except LookupError:
# charset is not a known codec.
return unquote(text)
+
+
+#
+# datetime doesn't provide a localtime function yet, so provide one. Code
+# adapted from the patch in issue 9527. This may not be perfect, but it is
+# better than not having it.
+#
+
+def localtime(dt=None, isdst=-1):
+ """Return local time as an aware datetime object.
+
+ If called without arguments, return current time. Otherwise *dt*
+ argument should be a datetime instance, and it is converted to the
+ local time zone according to the system time zone database. If *dt* is
+ naive (that is, dt.tzinfo is None), it is assumed to be in local time.
+ In this case, a positive or zero value for *isdst* causes localtime to
+ presume initially that summer time (for example, Daylight Saving Time)
+ is or is not (respectively) in effect for the specified time. A
+ negative value for *isdst* causes the localtime() function to attempt
+ to divine whether summer time is in effect for the specified time.
+
+ """
+ if dt is None:
+ return datetime.datetime.now(datetime.timezone.utc).astimezone()
+ if dt.tzinfo is not None:
+ return dt.astimezone()
+ # We have a naive datetime. Convert to a (localtime) timetuple and pass to
+ # system mktime together with the isdst hint. System mktime will return
+ # seconds since epoch.
+ tm = dt.timetuple()[:-1] + (isdst,)
+ seconds = time.mktime(tm)
+ localtm = time.localtime(seconds)
+ try:
+ delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
+ tz = datetime.timezone(delta, localtm.tm_zone)
+ except AttributeError:
+ # Compute UTC offset and compare with the value implied by tm_isdst.
+ # If the values match, use the zone name implied by tm_isdst.
+ delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
+ dst = time.daylight and localtm.tm_isdst > 0
+ gmtoff = -(time.altzone if dst else time.timezone)
+ if delta == datetime.timedelta(seconds=gmtoff):
+ tz = datetime.timezone(delta, time.tzname[dst])
+ else:
+ tz = datetime.timezone(delta)
+ return dt.replace(tzinfo=tz)
diff --git a/Lib/encodings/cp037.py b/Lib/encodings/cp037.py
index 4edd708..bfe2c1e 100644
--- a/Lib/encodings/cp037.py
+++ b/Lib/encodings/cp037.py
@@ -301,6 +301,7 @@ decoding_table = (
'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
'\x9f' # 0xFF -> CONTROL
+ '\ufffe' ## Widen to UCS2 for optimization
)
### Encoding table
diff --git a/Lib/encodings/cp500.py b/Lib/encodings/cp500.py
index 5f61535..a975be7 100644
--- a/Lib/encodings/cp500.py
+++ b/Lib/encodings/cp500.py
@@ -301,6 +301,7 @@ decoding_table = (
'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
'\x9f' # 0xFF -> CONTROL
+ '\ufffe' ## Widen to UCS2 for optimization
)
### Encoding table
diff --git a/Lib/encodings/cp65001.py b/Lib/encodings/cp65001.py
new file mode 100644
index 0000000..287eb87
--- /dev/null
+++ b/Lib/encodings/cp65001.py
@@ -0,0 +1,40 @@
+"""
+Code page 65001: Windows UTF-8 (CP_UTF8).
+"""
+
+import codecs
+import functools
+
+if not hasattr(codecs, 'code_page_encode'):
+ raise LookupError("cp65001 encoding is only available on Windows")
+
+### Codec APIs
+
+encode = functools.partial(codecs.code_page_encode, 65001)
+decode = functools.partial(codecs.code_page_decode, 65001)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ _buffer_decode = decode
+
+class StreamWriter(codecs.StreamWriter):
+ encode = encode
+
+class StreamReader(codecs.StreamReader):
+ decode = decode
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp65001',
+ encode=encode,
+ decode=decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/Lib/encodings/hp_roman8.py b/Lib/encodings/hp_roman8.py
index dbaaa72..2334208 100644
--- a/Lib/encodings/hp_roman8.py
+++ b/Lib/encodings/hp_roman8.py
@@ -14,18 +14,18 @@ import codecs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
- return codecs.charmap_encode(input,errors,encoding_map)
+ return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
- return codecs.charmap_decode(input,errors,decoding_map)
+ return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
- return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
- return codecs.charmap_decode(input,self.errors,decoding_map)[0]
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
@@ -46,107 +46,267 @@ def getregentry():
streamreader=StreamReader,
)
-### Decoding Map
-
-decoding_map = codecs.make_identity_dict(range(256))
-decoding_map.update({
- 0x00a1: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
- 0x00a2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
- 0x00a3: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
- 0x00a4: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
- 0x00a5: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
- 0x00a6: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
- 0x00a7: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
- 0x00a8: 0x00b4, # ACUTE ACCENT
- 0x00a9: 0x02cb, # MODIFIER LETTER GRAVE ACCENT (Mandarin Chinese fourth tone)
- 0x00aa: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
- 0x00ab: 0x00a8, # DIAERESIS
- 0x00ac: 0x02dc, # SMALL TILDE
- 0x00ad: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
- 0x00ae: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
- 0x00af: 0x20a4, # LIRA SIGN
- 0x00b0: 0x00af, # MACRON
- 0x00b1: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
- 0x00b2: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
- 0x00b3: 0x00b0, # DEGREE SIGN
- 0x00b4: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
- 0x00b5: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
- 0x00b6: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
- 0x00b7: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
- 0x00b8: 0x00a1, # INVERTED EXCLAMATION MARK
- 0x00b9: 0x00bf, # INVERTED QUESTION MARK
- 0x00ba: 0x00a4, # CURRENCY SIGN
- 0x00bb: 0x00a3, # POUND SIGN
- 0x00bc: 0x00a5, # YEN SIGN
- 0x00bd: 0x00a7, # SECTION SIGN
- 0x00be: 0x0192, # LATIN SMALL LETTER F WITH HOOK
- 0x00bf: 0x00a2, # CENT SIGN
- 0x00c0: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
- 0x00c1: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
- 0x00c2: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
- 0x00c3: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
- 0x00c4: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
- 0x00c5: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
- 0x00c6: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
- 0x00c7: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
- 0x00c8: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
- 0x00c9: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
- 0x00ca: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
- 0x00cb: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
- 0x00cc: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
- 0x00cd: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
- 0x00ce: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
- 0x00cf: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
- 0x00d0: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
- 0x00d1: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
- 0x00d2: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
- 0x00d3: 0x00c6, # LATIN CAPITAL LETTER AE
- 0x00d4: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
- 0x00d5: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
- 0x00d6: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
- 0x00d7: 0x00e6, # LATIN SMALL LETTER AE
- 0x00d8: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
- 0x00d9: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
- 0x00da: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
- 0x00db: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
- 0x00dc: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
- 0x00dd: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
- 0x00de: 0x00df, # LATIN SMALL LETTER SHARP S (German)
- 0x00df: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
- 0x00e0: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
- 0x00e1: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
- 0x00e2: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
- 0x00e3: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic)
- 0x00e4: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic)
- 0x00e5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
- 0x00e6: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
- 0x00e7: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
- 0x00e8: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
- 0x00e9: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
- 0x00ea: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
- 0x00eb: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
- 0x00ec: 0x0161, # LATIN SMALL LETTER S WITH CARON
- 0x00ed: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
- 0x00ee: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
- 0x00ef: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
- 0x00f0: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic)
- 0x00f1: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic)
- 0x00f2: 0x00b7, # MIDDLE DOT
- 0x00f3: 0x00b5, # MICRO SIGN
- 0x00f4: 0x00b6, # PILCROW SIGN
- 0x00f5: 0x00be, # VULGAR FRACTION THREE QUARTERS
- 0x00f6: 0x2014, # EM DASH
- 0x00f7: 0x00bc, # VULGAR FRACTION ONE QUARTER
- 0x00f8: 0x00bd, # VULGAR FRACTION ONE HALF
- 0x00f9: 0x00aa, # FEMININE ORDINAL INDICATOR
- 0x00fa: 0x00ba, # MASCULINE ORDINAL INDICATOR
- 0x00fb: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
- 0x00fc: 0x25a0, # BLACK SQUARE
- 0x00fd: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
- 0x00fe: 0x00b1, # PLUS-MINUS SIGN
- 0x00ff: None,
-})
-
-### Encoding Map
-
-encoding_map = codecs.make_encoding_map(decoding_map)
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\x80' # 0x80 -> <control>
+ '\x81' # 0x81 -> <control>
+ '\x82' # 0x82 -> <control>
+ '\x83' # 0x83 -> <control>
+ '\x84' # 0x84 -> <control>
+ '\x85' # 0x85 -> <control>
+ '\x86' # 0x86 -> <control>
+ '\x87' # 0x87 -> <control>
+ '\x88' # 0x88 -> <control>
+ '\x89' # 0x89 -> <control>
+ '\x8a' # 0x8A -> <control>
+ '\x8b' # 0x8B -> <control>
+ '\x8c' # 0x8C -> <control>
+ '\x8d' # 0x8D -> <control>
+ '\x8e' # 0x8E -> <control>
+ '\x8f' # 0x8F -> <control>
+ '\x90' # 0x90 -> <control>
+ '\x91' # 0x91 -> <control>
+ '\x92' # 0x92 -> <control>
+ '\x93' # 0x93 -> <control>
+ '\x94' # 0x94 -> <control>
+ '\x95' # 0x95 -> <control>
+ '\x96' # 0x96 -> <control>
+ '\x97' # 0x97 -> <control>
+ '\x98' # 0x98 -> <control>
+ '\x99' # 0x99 -> <control>
+ '\x9a' # 0x9A -> <control>
+ '\x9b' # 0x9B -> <control>
+ '\x9c' # 0x9C -> <control>
+ '\x9d' # 0x9D -> <control>
+ '\x9e' # 0x9E -> <control>
+ '\x9f' # 0x9F -> <control>
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\xc0' # 0xA1 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc2' # 0xA2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc8' # 0xA3 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xca' # 0xA4 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0xA5 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xce' # 0xA6 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0xA7 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xb4' # 0xA8 -> ACUTE ACCENT
+ '\u02cb' # 0xA9 -> MODIFIER LETTER GRAVE ACCENT (MANDARIN CHINESE FOURTH TONE)
+ '\u02c6' # 0xAA -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ '\xa8' # 0xAB -> DIAERESIS
+ '\u02dc' # 0xAC -> SMALL TILDE
+ '\xd9' # 0xAD -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xdb' # 0xAE -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\u20a4' # 0xAF -> LIRA SIGN
+ '\xaf' # 0xB0 -> MACRON
+ '\xdd' # 0xB1 -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xfd' # 0xB2 -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xb0' # 0xB3 -> DEGREE SIGN
+ '\xc7' # 0xB4 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xe7' # 0xB5 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xd1' # 0xB6 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xf1' # 0xB7 -> LATIN SMALL LETTER N WITH TILDE
+ '\xa1' # 0xB8 -> INVERTED EXCLAMATION MARK
+ '\xbf' # 0xB9 -> INVERTED QUESTION MARK
+ '\xa4' # 0xBA -> CURRENCY SIGN
+ '\xa3' # 0xBB -> POUND SIGN
+ '\xa5' # 0xBC -> YEN SIGN
+ '\xa7' # 0xBD -> SECTION SIGN
+ '\u0192' # 0xBE -> LATIN SMALL LETTER F WITH HOOK
+ '\xa2' # 0xBF -> CENT SIGN
+ '\xe2' # 0xC0 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xea' # 0xC1 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xf4' # 0xC2 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xfb' # 0xC3 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xe1' # 0xC4 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe9' # 0xC5 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xf3' # 0xC6 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0xC7 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xe0' # 0xC8 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe8' # 0xC9 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xf2' # 0xCA -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf9' # 0xCB -> LATIN SMALL LETTER U WITH GRAVE
+ '\xe4' # 0xCC -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xeb' # 0xCD -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xf6' # 0xCE -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xfc' # 0xCF -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xc5' # 0xD0 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xee' # 0xD1 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xd8' # 0xD2 -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xc6' # 0xD3 -> LATIN CAPITAL LETTER AE
+ '\xe5' # 0xD4 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xed' # 0xD5 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf8' # 0xD6 -> LATIN SMALL LETTER O WITH STROKE
+ '\xe6' # 0xD7 -> LATIN SMALL LETTER AE
+ '\xc4' # 0xD8 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xec' # 0xD9 -> LATIN SMALL LETTER I WITH GRAVE
+ '\xd6' # 0xDA -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0xDB -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xc9' # 0xDC -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xef' # 0xDD -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xdf' # 0xDE -> LATIN SMALL LETTER SHARP S (GERMAN)
+ '\xd4' # 0xDF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xc1' # 0xE0 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc3' # 0xE1 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xe3' # 0xE2 -> LATIN SMALL LETTER A WITH TILDE
+ '\xd0' # 0xE3 -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+ '\xf0' # 0xE4 -> LATIN SMALL LETTER ETH (ICELANDIC)
+ '\xcd' # 0xE5 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xcc' # 0xE6 -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\xd3' # 0xE7 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd2' # 0xE8 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd5' # 0xE9 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xf5' # 0xEA -> LATIN SMALL LETTER O WITH TILDE
+ '\u0160' # 0xEB -> LATIN CAPITAL LETTER S WITH CARON
+ '\u0161' # 0xEC -> LATIN SMALL LETTER S WITH CARON
+ '\xda' # 0xED -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\u0178' # 0xEE -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+ '\xff' # 0xEF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\xde' # 0xF0 -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+ '\xfe' # 0xF1 -> LATIN SMALL LETTER THORN (ICELANDIC)
+ '\xb7' # 0xF2 -> MIDDLE DOT
+ '\xb5' # 0xF3 -> MICRO SIGN
+ '\xb6' # 0xF4 -> PILCROW SIGN
+ '\xbe' # 0xF5 -> VULGAR FRACTION THREE QUARTERS
+ '\u2014' # 0xF6 -> EM DASH
+ '\xbc' # 0xF7 -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xF8 -> VULGAR FRACTION ONE HALF
+ '\xaa' # 0xF9 -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0xFA -> MASCULINE ORDINAL INDICATOR
+ '\xab' # 0xFB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u25a0' # 0xFC -> BLACK SQUARE
+ '\xbb' # 0xFD -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xb1' # 0xFE -> PLUS-MINUS SIGN
+ '\ufffe'
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index 583bdf1..ea40585 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -153,6 +153,20 @@ class Codec(codecs.Codec):
if not input:
return b'', 0
+ try:
+ result = input.encode('ascii')
+ except UnicodeEncodeError:
+ pass
+ else:
+ # ASCII name: fast path
+ labels = result.split(b'.')
+ for label in labels[:-1]:
+ if not (0 < len(label) < 64):
+ raise UnicodeError("label empty or too long")
+ if len(labels[-1]) >= 64:
+ raise UnicodeError("label too long")
+ return result, len(input)
+
result = bytearray()
labels = dots.split(input)
if labels and not labels[-1]:
@@ -179,6 +193,14 @@ class Codec(codecs.Codec):
if not isinstance(input, bytes):
# XXX obviously wrong, see #3232
input = bytes(input)
+
+ if ace_prefix not in input:
+ # Fast path
+ try:
+ return input.decode('ascii'), len(input)
+ except UnicodeDecodeError:
+ pass
+
labels = input.split(b".")
if labels and len(labels[-1]) == 0:
diff --git a/Lib/encodings/iso8859_1.py b/Lib/encodings/iso8859_1.py
index 8cfc01f..d9cc516 100644
--- a/Lib/encodings/iso8859_1.py
+++ b/Lib/encodings/iso8859_1.py
@@ -301,6 +301,7 @@ decoding_table = (
'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic)
'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\ufffe' ## Widen to UCS2 for optimization
)
### Encoding table
diff --git a/Lib/encodings/mac_latin2.py b/Lib/encodings/mac_latin2.py
index e322be2..da9d4b1 100644
--- a/Lib/encodings/mac_latin2.py
+++ b/Lib/encodings/mac_latin2.py
@@ -1,4 +1,4 @@
-""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py.
+""" Python Character Mapping Codec mac_latin2 generated from 'MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
@@ -14,18 +14,18 @@ import codecs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
- return codecs.charmap_encode(input,errors,encoding_map)
+ return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
- return codecs.charmap_decode(input,errors,decoding_map)
+ return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
- return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
- return codecs.charmap_decode(input,self.errors,decoding_map)[0]
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
@@ -46,138 +46,267 @@ def getregentry():
streamwriter=StreamWriter,
)
-### Decoding Map
-
-decoding_map = codecs.make_identity_dict(range(256))
-decoding_map.update({
- 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
- 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
- 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
- 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
- 0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
- 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
- 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
- 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
- 0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
- 0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
- 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
- 0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON
- 0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
- 0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
- 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
- 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
- 0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
- 0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
- 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
- 0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON
- 0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
- 0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON
- 0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
- 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
- 0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
- 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
- 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
- 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
- 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
- 0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
- 0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON
- 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
- 0x00a0: 0x2020, # DAGGER
- 0x00a1: 0x00b0, # DEGREE SIGN
- 0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
- 0x00a4: 0x00a7, # SECTION SIGN
- 0x00a5: 0x2022, # BULLET
- 0x00a6: 0x00b6, # PILCROW SIGN
- 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S
- 0x00a8: 0x00ae, # REGISTERED SIGN
- 0x00aa: 0x2122, # TRADE MARK SIGN
- 0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
- 0x00ac: 0x00a8, # DIAERESIS
- 0x00ad: 0x2260, # NOT EQUAL TO
- 0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
- 0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
- 0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
- 0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
- 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO
- 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO
- 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON
- 0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
- 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL
- 0x00b7: 0x2211, # N-ARY SUMMATION
- 0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE
- 0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
- 0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
- 0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
- 0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON
- 0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
- 0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
- 0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
- 0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
- 0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
- 0x00c2: 0x00ac, # NOT SIGN
- 0x00c3: 0x221a, # SQUARE ROOT
- 0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
- 0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
- 0x00c6: 0x2206, # INCREMENT
- 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
- 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
- 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS
- 0x00ca: 0x00a0, # NO-BREAK SPACE
- 0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON
- 0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
- 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
- 0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
- 0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
- 0x00d0: 0x2013, # EN DASH
- 0x00d1: 0x2014, # EM DASH
- 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK
- 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK
- 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK
- 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK
- 0x00d6: 0x00f7, # DIVISION SIGN
- 0x00d7: 0x25ca, # LOZENGE
- 0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON
- 0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
- 0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
- 0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
- 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
- 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
- 0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON
- 0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
- 0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
- 0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
- 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK
- 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
- 0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON
- 0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
- 0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
- 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
- 0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
- 0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON
- 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
- 0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
- 0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON
- 0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
- 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
- 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
- 0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON
- 0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
- 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
- 0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
- 0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
- 0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
- 0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
- 0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
- 0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
- 0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
- 0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
- 0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
- 0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
- 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
- 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
- 0x00ff: 0x02c7, # CARON
-})
-
-### Encoding Map
-
-encoding_map = codecs.make_encoding_map(decoding_map)
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\u0100' # 0x81 -> LATIN CAPITAL LETTER A WITH MACRON
+ '\u0101' # 0x82 -> LATIN SMALL LETTER A WITH MACRON
+ '\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\u0104' # 0x84 -> LATIN CAPITAL LETTER A WITH OGONEK
+ '\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE
+ '\u0105' # 0x88 -> LATIN SMALL LETTER A WITH OGONEK
+ '\u010c' # 0x89 -> LATIN CAPITAL LETTER C WITH CARON
+ '\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\u010d' # 0x8B -> LATIN SMALL LETTER C WITH CARON
+ '\u0106' # 0x8C -> LATIN CAPITAL LETTER C WITH ACUTE
+ '\u0107' # 0x8D -> LATIN SMALL LETTER C WITH ACUTE
+ '\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE
+ '\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE
+ '\u017a' # 0x90 -> LATIN SMALL LETTER Z WITH ACUTE
+ '\u010e' # 0x91 -> LATIN CAPITAL LETTER D WITH CARON
+ '\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE
+ '\u010f' # 0x93 -> LATIN SMALL LETTER D WITH CARON
+ '\u0112' # 0x94 -> LATIN CAPITAL LETTER E WITH MACRON
+ '\u0113' # 0x95 -> LATIN SMALL LETTER E WITH MACRON
+ '\u0116' # 0x96 -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+ '\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE
+ '\u0117' # 0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE
+ '\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE
+ '\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE
+ '\u011a' # 0x9D -> LATIN CAPITAL LETTER E WITH CARON
+ '\u011b' # 0x9E -> LATIN SMALL LETTER E WITH CARON
+ '\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\u2020' # 0xA0 -> DAGGER
+ '\xb0' # 0xA1 -> DEGREE SIGN
+ '\u0118' # 0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\xa7' # 0xA4 -> SECTION SIGN
+ '\u2022' # 0xA5 -> BULLET
+ '\xb6' # 0xA6 -> PILCROW SIGN
+ '\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S
+ '\xae' # 0xA8 -> REGISTERED SIGN
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\u2122' # 0xAA -> TRADE MARK SIGN
+ '\u0119' # 0xAB -> LATIN SMALL LETTER E WITH OGONEK
+ '\xa8' # 0xAC -> DIAERESIS
+ '\u2260' # 0xAD -> NOT EQUAL TO
+ '\u0123' # 0xAE -> LATIN SMALL LETTER G WITH CEDILLA
+ '\u012e' # 0xAF -> LATIN CAPITAL LETTER I WITH OGONEK
+ '\u012f' # 0xB0 -> LATIN SMALL LETTER I WITH OGONEK
+ '\u012a' # 0xB1 -> LATIN CAPITAL LETTER I WITH MACRON
+ '\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO
+ '\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO
+ '\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON
+ '\u0136' # 0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA
+ '\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL
+ '\u2211' # 0xB7 -> N-ARY SUMMATION
+ '\u0142' # 0xB8 -> LATIN SMALL LETTER L WITH STROKE
+ '\u013b' # 0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA
+ '\u013c' # 0xBA -> LATIN SMALL LETTER L WITH CEDILLA
+ '\u013d' # 0xBB -> LATIN CAPITAL LETTER L WITH CARON
+ '\u013e' # 0xBC -> LATIN SMALL LETTER L WITH CARON
+ '\u0139' # 0xBD -> LATIN CAPITAL LETTER L WITH ACUTE
+ '\u013a' # 0xBE -> LATIN SMALL LETTER L WITH ACUTE
+ '\u0145' # 0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA
+ '\u0146' # 0xC0 -> LATIN SMALL LETTER N WITH CEDILLA
+ '\u0143' # 0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE
+ '\xac' # 0xC2 -> NOT SIGN
+ '\u221a' # 0xC3 -> SQUARE ROOT
+ '\u0144' # 0xC4 -> LATIN SMALL LETTER N WITH ACUTE
+ '\u0147' # 0xC5 -> LATIN CAPITAL LETTER N WITH CARON
+ '\u2206' # 0xC6 -> INCREMENT
+ '\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS
+ '\xa0' # 0xCA -> NO-BREAK SPACE
+ '\u0148' # 0xCB -> LATIN SMALL LETTER N WITH CARON
+ '\u0150' # 0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ '\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE
+ '\u0151' # 0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ '\u014c' # 0xCF -> LATIN CAPITAL LETTER O WITH MACRON
+ '\u2013' # 0xD0 -> EN DASH
+ '\u2014' # 0xD1 -> EM DASH
+ '\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK
+ '\xf7' # 0xD6 -> DIVISION SIGN
+ '\u25ca' # 0xD7 -> LOZENGE
+ '\u014d' # 0xD8 -> LATIN SMALL LETTER O WITH MACRON
+ '\u0154' # 0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE
+ '\u0155' # 0xDA -> LATIN SMALL LETTER R WITH ACUTE
+ '\u0158' # 0xDB -> LATIN CAPITAL LETTER R WITH CARON
+ '\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\u0159' # 0xDE -> LATIN SMALL LETTER R WITH CARON
+ '\u0156' # 0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA
+ '\u0157' # 0xE0 -> LATIN SMALL LETTER R WITH CEDILLA
+ '\u0160' # 0xE1 -> LATIN CAPITAL LETTER S WITH CARON
+ '\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK
+ '\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u0161' # 0xE4 -> LATIN SMALL LETTER S WITH CARON
+ '\u015a' # 0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE
+ '\u015b' # 0xE6 -> LATIN SMALL LETTER S WITH ACUTE
+ '\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\u0164' # 0xE8 -> LATIN CAPITAL LETTER T WITH CARON
+ '\u0165' # 0xE9 -> LATIN SMALL LETTER T WITH CARON
+ '\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\u017d' # 0xEB -> LATIN CAPITAL LETTER Z WITH CARON
+ '\u017e' # 0xEC -> LATIN SMALL LETTER Z WITH CARON
+ '\u016a' # 0xED -> LATIN CAPITAL LETTER U WITH MACRON
+ '\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\u016b' # 0xF0 -> LATIN SMALL LETTER U WITH MACRON
+ '\u016e' # 0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE
+ '\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\u016f' # 0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE
+ '\u0170' # 0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ '\u0171' # 0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+ '\u0172' # 0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK
+ '\u0173' # 0xF7 -> LATIN SMALL LETTER U WITH OGONEK
+ '\xdd' # 0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xfd' # 0xF9 -> LATIN SMALL LETTER Y WITH ACUTE
+ '\u0137' # 0xFA -> LATIN SMALL LETTER K WITH CEDILLA
+ '\u017b' # 0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ '\u0141' # 0xFC -> LATIN CAPITAL LETTER L WITH STROKE
+ '\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
+ '\u0122' # 0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA
+ '\u02c7' # 0xFF -> CARON
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/Lib/encodings/palmos.py b/Lib/encodings/palmos.py
index 4b77e2b..c506d65 100644
--- a/Lib/encodings/palmos.py
+++ b/Lib/encodings/palmos.py
@@ -10,18 +10,18 @@ import codecs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
- return codecs.charmap_encode(input,errors,encoding_map)
+ return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
- return codecs.charmap_decode(input,errors,decoding_map)
+ return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
- return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
- return codecs.charmap_decode(input,self.errors,decoding_map)[0]
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
@@ -42,42 +42,267 @@ def getregentry():
streamwriter=StreamWriter,
)
-### Decoding Map
-
-decoding_map = codecs.make_identity_dict(range(256))
-
-# The PalmOS character set is mostly iso-8859-1 with some differences.
-decoding_map.update({
- 0x0080: 0x20ac, # EURO SIGN
- 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
- 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK
- 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
- 0x0085: 0x2026, # HORIZONTAL ELLIPSIS
- 0x0086: 0x2020, # DAGGER
- 0x0087: 0x2021, # DOUBLE DAGGER
- 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT
- 0x0089: 0x2030, # PER MILLE SIGN
- 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
- 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
- 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE
- 0x008d: 0x2666, # BLACK DIAMOND SUIT
- 0x008e: 0x2663, # BLACK CLUB SUIT
- 0x008f: 0x2665, # BLACK HEART SUIT
- 0x0090: 0x2660, # BLACK SPADE SUIT
- 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
- 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
- 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
- 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
- 0x0095: 0x2022, # BULLET
- 0x0096: 0x2013, # EN DASH
- 0x0097: 0x2014, # EM DASH
- 0x0098: 0x02dc, # SMALL TILDE
- 0x0099: 0x2122, # TRADE MARK SIGN
- 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON
- 0x009c: 0x0153, # LATIN SMALL LIGATURE OE
- 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
-})
-
-### Encoding Map
-
-encoding_map = codecs.make_encoding_map(decoding_map)
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\x81' # 0x81 -> <control>
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
+ '\u2666' # 0x8D -> BLACK DIAMOND SUIT
+ '\u2663' # 0x8E -> BLACK CLUB SUIT
+ '\u2665' # 0x8F -> BLACK HEART SUIT
+ '\u2660' # 0x90 -> BLACK SPADE SUIT
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\u02dc' # 0x98 -> SMALL TILDE
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
+ '\x9b' # 0x9B -> <control>
+ '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
+ '\x9d' # 0x9D -> <control>
+ '\x9e' # 0x9E -> <control>
+ '\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ '\xa2' # 0xA2 -> CENT SIGN
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\xa5' # 0xA5 -> YEN SIGN
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xa8' # 0xA8 -> DIAERESIS
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\xaf' # 0xAF -> MACRON
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ '\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ '\xb4' # 0xB4 -> ACUTE ACCENT
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\xb8' # 0xB8 -> CEDILLA
+ '\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ '\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ '\xbf' # 0xBF -> INVERTED QUESTION MARK
+ '\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
+ '\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic)
+ '\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ '\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic)
+ '\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German)
+ '\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe6' # 0xE6 -> LATIN SMALL LETTER AE
+ '\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
+ '\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic)
+ '\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
+ '\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
+ '\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf7' # 0xF7 -> DIVISION SIGN
+ '\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
+ '\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ '\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic)
+ '\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/Lib/encodings/ptcp154.py b/Lib/encodings/ptcp154.py
index aef8975..656b79d 100644
--- a/Lib/encodings/ptcp154.py
+++ b/Lib/encodings/ptcp154.py
@@ -14,18 +14,18 @@ import codecs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
- return codecs.charmap_encode(input,errors,encoding_map)
+ return codecs.charmap_encode(input,errors,encoding_table)
def decode(self,input,errors='strict'):
- return codecs.charmap_decode(input,errors,decoding_map)
+ return codecs.charmap_decode(input,errors,decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
- return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
- return codecs.charmap_decode(input,self.errors,decoding_map)[0]
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
class StreamWriter(Codec,codecs.StreamWriter):
pass
@@ -46,130 +46,267 @@ def getregentry():
streamwriter=StreamWriter,
)
-### Decoding Map
-
-decoding_map = codecs.make_identity_dict(range(256))
-decoding_map.update({
- 0x0080: 0x0496, # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
- 0x0081: 0x0492, # CYRILLIC CAPITAL LETTER GHE WITH STROKE
- 0x0082: 0x04ee, # CYRILLIC CAPITAL LETTER U WITH MACRON
- 0x0083: 0x0493, # CYRILLIC SMALL LETTER GHE WITH STROKE
- 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
- 0x0085: 0x2026, # HORIZONTAL ELLIPSIS
- 0x0086: 0x04b6, # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
- 0x0087: 0x04ae, # CYRILLIC CAPITAL LETTER STRAIGHT U
- 0x0088: 0x04b2, # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
- 0x0089: 0x04af, # CYRILLIC SMALL LETTER STRAIGHT U
- 0x008a: 0x04a0, # CYRILLIC CAPITAL LETTER BASHKIR KA
- 0x008b: 0x04e2, # CYRILLIC CAPITAL LETTER I WITH MACRON
- 0x008c: 0x04a2, # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
- 0x008d: 0x049a, # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
- 0x008e: 0x04ba, # CYRILLIC CAPITAL LETTER SHHA
- 0x008f: 0x04b8, # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
- 0x0090: 0x0497, # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
- 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK
- 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK
- 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK
- 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK
- 0x0095: 0x2022, # BULLET
- 0x0096: 0x2013, # EN DASH
- 0x0097: 0x2014, # EM DASH
- 0x0098: 0x04b3, # CYRILLIC SMALL LETTER HA WITH DESCENDER
- 0x0099: 0x04b7, # CYRILLIC SMALL LETTER CHE WITH DESCENDER
- 0x009a: 0x04a1, # CYRILLIC SMALL LETTER BASHKIR KA
- 0x009b: 0x04e3, # CYRILLIC SMALL LETTER I WITH MACRON
- 0x009c: 0x04a3, # CYRILLIC SMALL LETTER EN WITH DESCENDER
- 0x009d: 0x049b, # CYRILLIC SMALL LETTER KA WITH DESCENDER
- 0x009e: 0x04bb, # CYRILLIC SMALL LETTER SHHA
- 0x009f: 0x04b9, # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
- 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U (Byelorussian)
- 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U (Byelorussian)
- 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE
- 0x00a4: 0x04e8, # CYRILLIC CAPITAL LETTER BARRED O
- 0x00a5: 0x0498, # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
- 0x00a6: 0x04b0, # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
- 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO
- 0x00aa: 0x04d8, # CYRILLIC CAPITAL LETTER SCHWA
- 0x00ad: 0x04ef, # CYRILLIC SMALL LETTER U WITH MACRON
- 0x00af: 0x049c, # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
- 0x00b1: 0x04b1, # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
- 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
- 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
- 0x00b4: 0x0499, # CYRILLIC SMALL LETTER ZE WITH DESCENDER
- 0x00b5: 0x04e9, # CYRILLIC SMALL LETTER BARRED O
- 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO
- 0x00b9: 0x2116, # NUMERO SIGN
- 0x00ba: 0x04d9, # CYRILLIC SMALL LETTER SCHWA
- 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE
- 0x00bd: 0x04aa, # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
- 0x00be: 0x04ab, # CYRILLIC SMALL LETTER ES WITH DESCENDER
- 0x00bf: 0x049d, # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
- 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A
- 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE
- 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE
- 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE
- 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE
- 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE
- 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
- 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE
- 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I
- 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
- 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA
- 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL
- 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM
- 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN
- 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O
- 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE
- 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER
- 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES
- 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE
- 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U
- 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF
- 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA
- 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE
- 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE
- 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA
- 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
- 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
- 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU
- 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
- 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E
- 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU
- 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA
- 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A
- 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE
- 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE
- 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE
- 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE
- 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE
- 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE
- 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE
- 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I
- 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
- 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA
- 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL
- 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM
- 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN
- 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O
- 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE
- 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER
- 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES
- 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE
- 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U
- 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF
- 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA
- 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE
- 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE
- 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA
- 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
- 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
- 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU
- 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
- 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
- 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
- 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
-})
-
-### Encoding Map
-
-encoding_map = codecs.make_encoding_map(decoding_map)
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE (DEL)
+ '\u0496' # 0x80 -> CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+ '\u0492' # 0x81 -> CYRILLIC CAPITAL LETTER GHE WITH STROKE
+ '\u04ee' # 0x82 -> CYRILLIC CAPITAL LETTER U WITH MACRON
+ '\u0493' # 0x83 -> CYRILLIC SMALL LETTER GHE WITH STROKE
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u04b6' # 0x86 -> CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+ '\u04ae' # 0x87 -> CYRILLIC CAPITAL LETTER STRAIGHT U
+ '\u04b2' # 0x88 -> CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+ '\u04af' # 0x89 -> CYRILLIC SMALL LETTER STRAIGHT U
+ '\u04a0' # 0x8A -> CYRILLIC CAPITAL LETTER BASHKIR KA
+ '\u04e2' # 0x8B -> CYRILLIC CAPITAL LETTER I WITH MACRON
+ '\u04a2' # 0x8C -> CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+ '\u049a' # 0x8D -> CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+ '\u04ba' # 0x8E -> CYRILLIC CAPITAL LETTER SHHA
+ '\u04b8' # 0x8F -> CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+ '\u0497' # 0x90 -> CYRILLIC SMALL LETTER ZHE WITH DESCENDER
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\u04b3' # 0x98 -> CYRILLIC SMALL LETTER HA WITH DESCENDER
+ '\u04b7' # 0x99 -> CYRILLIC SMALL LETTER CHE WITH DESCENDER
+ '\u04a1' # 0x9A -> CYRILLIC SMALL LETTER BASHKIR KA
+ '\u04e3' # 0x9B -> CYRILLIC SMALL LETTER I WITH MACRON
+ '\u04a3' # 0x9C -> CYRILLIC SMALL LETTER EN WITH DESCENDER
+ '\u049b' # 0x9D -> CYRILLIC SMALL LETTER KA WITH DESCENDER
+ '\u04bb' # 0x9E -> CYRILLIC SMALL LETTER SHHA
+ '\u04b9' # 0x9F -> CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U (Byelorussian)
+ '\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U (Byelorussian)
+ '\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE
+ '\u04e8' # 0xA4 -> CYRILLIC CAPITAL LETTER BARRED O
+ '\u0498' # 0xA5 -> CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+ '\u04b0' # 0xA6 -> CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\u04d8' # 0xAA -> CYRILLIC CAPITAL LETTER SCHWA
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\u04ef' # 0xAD -> CYRILLIC SMALL LETTER U WITH MACRON
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\u049c' # 0xAF -> CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\u04b1' # 0xB1 -> CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
+ '\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0499' # 0xB4 -> CYRILLIC SMALL LETTER ZE WITH DESCENDER
+ '\u04e9' # 0xB5 -> CYRILLIC SMALL LETTER BARRED O
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO
+ '\u2116' # 0xB9 -> NUMERO SIGN
+ '\u04d9' # 0xBA -> CYRILLIC SMALL LETTER SCHWA
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE
+ '\u04aa' # 0xBD -> CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+ '\u04ab' # 0xBE -> CYRILLIC SMALL LETTER ES WITH DESCENDER
+ '\u049d' # 0xBF -> CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
+ '\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A
+ '\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE
+ '\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE
+ '\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE
+ '\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE
+ '\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE
+ '\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE
+ '\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE
+ '\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I
+ '\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I
+ '\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA
+ '\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL
+ '\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM
+ '\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN
+ '\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O
+ '\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE
+ '\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER
+ '\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES
+ '\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE
+ '\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U
+ '\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF
+ '\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA
+ '\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE
+ '\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE
+ '\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA
+ '\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA
+ '\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN
+ '\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU
+ '\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN
+ '\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E
+ '\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU
+ '\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA
+ '\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A
+ '\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE
+ '\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE
+ '\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE
+ '\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE
+ '\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE
+ '\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE
+ '\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE
+ '\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I
+ '\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I
+ '\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA
+ '\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL
+ '\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM
+ '\u043d' # 0xED -> CYRILLIC SMALL LETTER EN
+ '\u043e' # 0xEE -> CYRILLIC SMALL LETTER O
+ '\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE
+ '\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER
+ '\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES
+ '\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE
+ '\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U
+ '\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF
+ '\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA
+ '\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE
+ '\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE
+ '\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA
+ '\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA
+ '\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN
+ '\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU
+ '\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN
+ '\u044d' # 0xFD -> CYRILLIC SMALL LETTER E
+ '\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU
+ '\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/Lib/fileinput.py b/Lib/fileinput.py
index 554beb2..dbbbb21 100644
--- a/Lib/fileinput.py
+++ b/Lib/fileinput.py
@@ -398,9 +398,8 @@ def hook_compressed(filename, mode):
def hook_encoded(encoding):
- import codecs
def openhook(filename, mode):
- return codecs.open(filename, mode, encoding)
+ return open(filename, mode, encoding=encoding)
return openhook
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 726fbe5..f446769 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -35,9 +35,9 @@ def fnmatch(name, pat):
pat = os.path.normcase(pat)
return fnmatchcase(name, pat)
-@functools.lru_cache(maxsize=250)
-def _compile_pattern(pat, is_bytes=False):
- if is_bytes:
+@functools.lru_cache(maxsize=250, typed=True)
+def _compile_pattern(pat):
+ if isinstance(pat, bytes):
pat_str = str(pat, 'ISO-8859-1')
res_str = translate(pat_str)
res = bytes(res_str, 'ISO-8859-1')
@@ -49,7 +49,7 @@ def filter(names, pat):
"""Return the subset of the list NAMES that match PAT."""
result = []
pat = os.path.normcase(pat)
- match = _compile_pattern(pat, isinstance(pat, bytes))
+ match = _compile_pattern(pat)
if os.path is posixpath:
# normcase on posix is NOP. Optimize it away from the loop.
for name in names:
@@ -67,7 +67,7 @@ def fnmatchcase(name, pat):
This is a version of fnmatch() which doesn't case-normalize
its arguments.
"""
- match = _compile_pattern(pat, isinstance(pat, bytes))
+ match = _compile_pattern(pat)
return match(name) is not None
diff --git a/Lib/ftplib.py b/Lib/ftplib.py
index 8e53023..18887a6 100644
--- a/Lib/ftplib.py
+++ b/Lib/ftplib.py
@@ -100,14 +100,15 @@ class FTP:
file = None
welcome = None
passiveserver = 1
- encoding = "latin1"
+ encoding = "latin-1"
# Initialization method (called by class instantiation).
# Initialize host to localhost, port to standard ftp port
# Optional arguments are host (for connect()),
# and user, passwd, acct (for login())
def __init__(self, host='', user='', passwd='', acct='',
- timeout=_GLOBAL_DEFAULT_TIMEOUT):
+ timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+ self.source_address = source_address
self.timeout = timeout
if host:
self.connect(host)
@@ -128,10 +129,12 @@ class FTP:
if self.sock is not None:
self.close()
- def connect(self, host='', port=0, timeout=-999):
+ def connect(self, host='', port=0, timeout=-999, source_address=None):
'''Connect to host. Arguments are:
- host: hostname to connect to (string, default previous host)
- port: port to connect to (integer, default previous port)
+ - source_address: a 2-tuple (host, port) for the socket to bind
+ to as its source address before connecting.
'''
if host != '':
self.host = host
@@ -139,7 +142,10 @@ class FTP:
self.port = port
if timeout != -999:
self.timeout = timeout
- self.sock = socket.create_connection((self.host, self.port), self.timeout)
+ if source_address is not None:
+ self.source_address = source_address
+ self.sock = socket.create_connection((self.host, self.port), self.timeout,
+ source_address=self.source_address)
self.af = self.sock.family
self.file = self.sock.makefile('r', encoding=self.encoding)
self.welcome = self.getresp()
@@ -169,10 +175,8 @@ class FTP:
# Internal: "sanitize" a string for printing
def sanitize(self, s):
- if s[:5] == 'pass ' or s[:5] == 'PASS ':
- i = len(s)
- while i > 5 and s[i-1] in {'\r', '\n'}:
- i = i-1
+ if s[:5] in {'pass ', 'PASS '}:
+ i = len(s.rstrip('\r\n'))
s = s[:5] + '*'*(i-5) + s[i:]
return repr(s)
@@ -335,7 +339,8 @@ class FTP:
size = None
if self.passiveserver:
host, port = self.makepasv()
- conn = socket.create_connection((host, port), self.timeout)
+ conn = socket.create_connection((host, port), self.timeout,
+ source_address=self.source_address)
try:
if rest is not None:
self.sendcmd("REST %s" % rest)
@@ -354,8 +359,7 @@ class FTP:
conn.close()
raise
else:
- sock = self.makeport()
- try:
+ with self.makeport() as sock:
if rest is not None:
self.sendcmd("REST %s" % rest)
resp = self.sendcmd(cmd)
@@ -367,8 +371,6 @@ class FTP:
conn, sockaddr = sock.accept()
if self.timeout is not _GLOBAL_DEFAULT_TIMEOUT:
conn.settimeout(self.timeout)
- finally:
- sock.close()
if resp[:3] == '150':
# this is conditional in case we received a 125
size = parse150(resp)
@@ -426,7 +428,7 @@ class FTP:
"""Retrieve data in line mode. A new port is created for you.
Args:
- cmd: A RETR, LIST, NLST, or MLSD command.
+ cmd: A RETR, LIST, or NLST command.
callback: An optional single parameter callable that is called
for each line with the trailing CRLF stripped.
[default: print_line()]
@@ -527,6 +529,34 @@ class FTP:
cmd = cmd + (' ' + arg)
self.retrlines(cmd, func)
+ def mlsd(self, path="", facts=[]):
+ '''List a directory in a standardized format by using MLSD
+ command (RFC-3659). If path is omitted the current directory
+ is assumed. "facts" is a list of strings representing the type
+ of information desired (e.g. ["type", "size", "perm"]).
+
+ Return a generator object yielding a tuple of two elements
+ for every file found in path.
+ First element is the file name, the second one is a dictionary
+ including a variable number of "facts" depending on the server
+ and whether "facts" argument has been provided.
+ '''
+ if facts:
+ self.sendcmd("OPTS MLST " + ";".join(facts) + ";")
+ if path:
+ cmd = "MLSD %s" % path
+ else:
+ cmd = "MLSD"
+ lines = []
+ self.retrlines(cmd, lines.append)
+ for line in lines:
+ facts_found, _, name = line.rstrip(CRLF).partition(' ')
+ entry = {}
+ for fact in facts_found[:-1].split(";"):
+ key, _, value = fact.partition("=")
+ entry[key.lower()] = value
+ yield (name, entry)
+
def rename(self, fromname, toname):
'''Rename a file.'''
resp = self.sendcmd('RNFR ' + fromname)
@@ -561,10 +591,7 @@ class FTP:
resp = self.sendcmd('SIZE ' + filename)
if resp[:3] == '213':
s = resp[3:].strip()
- try:
- return int(s)
- except (OverflowError, ValueError):
- return int(s)
+ return int(s)
def mkd(self, dirname):
'''Make a directory, return its full pathname.'''
@@ -596,11 +623,11 @@ class FTP:
def close(self):
'''Close the connection without assuming anything about it.'''
- if self.file:
+ if self.file is not None:
self.file.close()
+ if self.sock is not None:
self.sock.close()
- self.file = self.sock = None
-
+ self.file = self.sock = None
try:
import ssl
@@ -644,7 +671,7 @@ else:
def __init__(self, host='', user='', passwd='', acct='', keyfile=None,
certfile=None, context=None,
- timeout=_GLOBAL_DEFAULT_TIMEOUT):
+ timeout=_GLOBAL_DEFAULT_TIMEOUT, source_address=None):
if context is not None and keyfile is not None:
raise ValueError("context and keyfile arguments are mutually "
"exclusive")
@@ -655,7 +682,7 @@ else:
self.certfile = certfile
self.context = context
self._prot_p = False
- FTP.__init__(self, host, user, passwd, acct, timeout)
+ FTP.__init__(self, host, user, passwd, acct, timeout, source_address)
def login(self, user='', passwd='', acct='', secure=True):
if secure and not isinstance(self.sock, ssl.SSLSocket):
@@ -679,6 +706,14 @@ else:
self.file = self.sock.makefile(mode='r', encoding=self.encoding)
return resp
+ def ccc(self):
+ '''Switch back to a clear-text control connection.'''
+ if not isinstance(self.sock, ssl.SSLSocket):
+ raise ValueError("not using TLS")
+ resp = self.voidcmd('CCC')
+ self.sock = self.sock.unwrap()
+ return resp
+
def prot_p(self):
'''Set up secure data connection.'''
# PROT defines whether or not the data channel is to be protected.
@@ -715,8 +750,7 @@ else:
def retrbinary(self, cmd, callback, blocksize=8192, rest=None):
self.voidcmd('TYPE I')
- conn = self.transfercmd(cmd, rest)
- try:
+ with self.transfercmd(cmd, rest) as conn:
while 1:
data = conn.recv(blocksize)
if not data:
@@ -725,8 +759,6 @@ else:
# shutdown ssl layer
if isinstance(conn, ssl.SSLSocket):
conn.unwrap()
- finally:
- conn.close()
return self.voidresp()
def retrlines(self, cmd, callback = None):
@@ -734,7 +766,7 @@ else:
resp = self.sendcmd('TYPE A')
conn = self.transfercmd(cmd)
fp = conn.makefile('r', encoding=self.encoding)
- try:
+ with fp, conn:
while 1:
line = fp.readline()
if self.debugging > 2: print('*retr*', repr(line))
@@ -748,15 +780,11 @@ else:
# shutdown ssl layer
if isinstance(conn, ssl.SSLSocket):
conn.unwrap()
- finally:
- fp.close()
- conn.close()
return self.voidresp()
def storbinary(self, cmd, fp, blocksize=8192, callback=None, rest=None):
self.voidcmd('TYPE I')
- conn = self.transfercmd(cmd, rest)
- try:
+ with self.transfercmd(cmd, rest) as conn:
while 1:
buf = fp.read(blocksize)
if not buf: break
@@ -765,14 +793,11 @@ else:
# shutdown ssl layer
if isinstance(conn, ssl.SSLSocket):
conn.unwrap()
- finally:
- conn.close()
return self.voidresp()
def storlines(self, cmd, fp, callback=None):
self.voidcmd('TYPE A')
- conn = self.transfercmd(cmd)
- try:
+ with self.transfercmd(cmd) as conn:
while 1:
buf = fp.readline()
if not buf: break
@@ -784,8 +809,6 @@ else:
# shutdown ssl layer
if isinstance(conn, ssl.SSLSocket):
conn.unwrap()
- finally:
- conn.close()
return self.voidresp()
def abort(self):
@@ -818,11 +841,7 @@ def parse150(resp):
m = _150_re.match(resp)
if not m:
return None
- s = m.group(1)
- try:
- return int(s)
- except (OverflowError, ValueError):
- return int(s)
+ return int(m.group(1))
_227_re = None
diff --git a/Lib/functools.py b/Lib/functools.py
index 85ea257..226a46e 100644
--- a/Lib/functools.py
+++ b/Lib/functools.py
@@ -12,16 +12,22 @@ __all__ = ['update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES',
'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce', 'partial']
from _functools import partial, reduce
-from collections import OrderedDict, namedtuple
+from collections import namedtuple
try:
from _thread import allocate_lock as Lock
except:
from _dummy_thread import allocate_lock as Lock
+
+################################################################################
+### update_wrapper() and wraps() decorator
+################################################################################
+
# update_wrapper() and wraps() are tools to help write
# wrapper functions that can handle naive introspection
-WRAPPER_ASSIGNMENTS = ('__module__', '__name__', '__doc__', '__annotations__')
+WRAPPER_ASSIGNMENTS = ('__module__', '__name__', '__qualname__', '__doc__',
+ '__annotations__')
WRAPPER_UPDATES = ('__dict__',)
def update_wrapper(wrapper,
wrapped,
@@ -65,6 +71,11 @@ def wraps(wrapped,
return partial(update_wrapper, wrapped=wrapped,
assigned=assigned, updated=updated)
+
+################################################################################
+### total_ordering class decorator
+################################################################################
+
def total_ordering(cls):
"""Class decorator that fills in missing ordering methods"""
convert = {
@@ -93,6 +104,11 @@ def total_ordering(cls):
setattr(cls, opname, opfunc)
return cls
+
+################################################################################
+### cmp_to_key() function converter
+################################################################################
+
def cmp_to_key(mycmp):
"""Convert a cmp= function into a key= function"""
class K(object):
@@ -114,95 +130,174 @@ def cmp_to_key(mycmp):
__hash__ = None
return K
-_CacheInfo = namedtuple("CacheInfo", "hits misses maxsize currsize")
+try:
+ from _functools import cmp_to_key
+except ImportError:
+ pass
+
+
+################################################################################
+### LRU Cache function decorator
+################################################################################
+
+_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
+
+class _HashedSeq(list):
+ __slots__ = 'hashvalue'
+
+ def __init__(self, tup, hash=hash):
+ self[:] = tup
+ self.hashvalue = hash(tup)
+
+ def __hash__(self):
+ return self.hashvalue
-def lru_cache(maxsize=100):
+def _make_key(args, kwds, typed,
+ kwd_mark = (object(),),
+ fasttypes = {int, str, frozenset, type(None)},
+ sorted=sorted, tuple=tuple, type=type, len=len):
+ 'Make a cache key from optionally typed positional and keyword arguments'
+ key = args
+ if kwds:
+ sorted_items = sorted(kwds.items())
+ key += kwd_mark
+ for item in sorted_items:
+ key += item
+ if typed:
+ key += tuple(type(v) for v in args)
+ if kwds:
+ key += tuple(type(v) for k, v in sorted_items)
+ elif len(key) == 1 and type(key[0]) in fasttypes:
+ return key[0]
+ return _HashedSeq(key)
+
+def lru_cache(maxsize=128, typed=False):
"""Least-recently-used cache decorator.
If *maxsize* is set to None, the LRU features are disabled and the cache
can grow without bound.
+ If *typed* is True, arguments of different types will be cached separately.
+ For example, f(3.0) and f(3) will be treated as distinct calls with
+ distinct results.
+
Arguments to the cached function must be hashable.
- View the cache statistics named tuple (hits, misses, maxsize, currsize) with
- f.cache_info(). Clear the cache and statistics with f.cache_clear().
+ View the cache statistics named tuple (hits, misses, maxsize, currsize)
+ with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
"""
+
# Users should only access the lru_cache through its public API:
# cache_info, cache_clear, and f.__wrapped__
# The internals of the lru_cache are encapsulated for thread safety and
# to allow the implementation to change (including a possible C version).
- def decorating_function(user_function,
- tuple=tuple, sorted=sorted, len=len, KeyError=KeyError):
+ # Constants shared by all lru cache instances:
+ sentinel = object() # unique object used to signal cache misses
+ make_key = _make_key # build a key from the function arguments
+ PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
+
+ def decorating_function(user_function):
- hits = misses = 0
- kwd_mark = (object(),) # separates positional and keyword args
- lock = Lock() # needed because OrderedDict isn't threadsafe
+ cache = {}
+ hits = misses = currsize = 0
+ full = False
+ cache_get = cache.get # bound method to lookup a key or return None
+ lock = Lock() # because linkedlist updates aren't threadsafe
+ root = [] # root of the circular doubly linked list
+ root[:] = [root, root, None, None] # initialize by pointing to self
- if maxsize is None:
- cache = dict() # simple cache without ordering or size limit
+ if maxsize == 0:
- @wraps(user_function)
def wrapper(*args, **kwds):
- nonlocal hits, misses
- key = args
- if kwds:
- key += kwd_mark + tuple(sorted(kwds.items()))
- try:
- result = cache[key]
+ # no caching, just a statistics update after a successful call
+ nonlocal misses
+ result = user_function(*args, **kwds)
+ misses += 1
+ return result
+
+ elif maxsize is None:
+
+ def wrapper(*args, **kwds):
+ # simple caching without ordering or size limit
+ nonlocal hits, misses, currsize
+ key = make_key(args, kwds, typed)
+ result = cache_get(key, sentinel)
+ if result is not sentinel:
hits += 1
return result
- except KeyError:
- pass
result = user_function(*args, **kwds)
cache[key] = result
misses += 1
+ currsize += 1
return result
+
else:
- cache = OrderedDict() # ordered least recent to most recent
- cache_popitem = cache.popitem
- cache_renew = cache.move_to_end
- @wraps(user_function)
def wrapper(*args, **kwds):
- nonlocal hits, misses
- key = args
- if kwds:
- key += kwd_mark + tuple(sorted(kwds.items()))
+ # size limited caching that tracks accesses by recency
+ nonlocal root, hits, misses, currsize, full
+ key = make_key(args, kwds, typed)
with lock:
- try:
- result = cache[key]
- cache_renew(key) # record recent use of this key
+ link = cache_get(key)
+ if link is not None:
+ # move the link to the front of the circular queue
+ link_prev, link_next, key, result = link
+ link_prev[NEXT] = link_next
+ link_next[PREV] = link_prev
+ last = root[PREV]
+ last[NEXT] = root[PREV] = link
+ link[PREV] = last
+ link[NEXT] = root
hits += 1
return result
- except KeyError:
- pass
result = user_function(*args, **kwds)
with lock:
- cache[key] = result # record recent use of this key
+ if key in cache:
+ # getting here means that this same key was added to the
+ # cache while the lock was released. since the link
+ # update is already done, we need only return the
+ # computed result and update the count of misses.
+ pass
+ elif full:
+ # use root to store the new key and result
+ root[KEY] = key
+ root[RESULT] = result
+ cache[key] = root
+ # empty the oldest link and make it the new root
+ root = root[NEXT]
+ del cache[root[KEY]]
+ root[KEY] = root[RESULT] = None
+ else:
+ # put result in a new link at the front of the queue
+ last = root[PREV]
+ link = [last, root, key, result]
+ cache[key] = last[NEXT] = root[PREV] = link
+ currsize += 1
+ full = (currsize == maxsize)
misses += 1
- if len(cache) > maxsize:
- cache_popitem(0) # purge least recently used cache entry
return result
def cache_info():
"""Report cache statistics"""
with lock:
- return _CacheInfo(hits, misses, maxsize, len(cache))
+ return _CacheInfo(hits, misses, maxsize, currsize)
def cache_clear():
"""Clear the cache and cache statistics"""
- nonlocal hits, misses
+ nonlocal hits, misses, currsize, full
with lock:
cache.clear()
- hits = misses = 0
+ root[:] = [root, root, None, None]
+ hits = misses = currsize = 0
+ full = False
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
- return wrapper
+ return update_wrapper(wrapper, user_function)
return decorating_function
diff --git a/Lib/getopt.py b/Lib/getopt.py
index 980861d..3d6ecbd 100644
--- a/Lib/getopt.py
+++ b/Lib/getopt.py
@@ -19,7 +19,7 @@ option involved with the exception.
# Gerrit Holl <gerrit@nl.linux.org> moved the string-based exceptions
# to class-based exceptions.
#
-# Peter Åstrand <astrand@lysator.liu.se> added gnu_getopt().
+# Peter Ã…strand <astrand@lysator.liu.se> added gnu_getopt().
#
# TODO for gnu_getopt():
#
@@ -34,6 +34,11 @@ option involved with the exception.
__all__ = ["GetoptError","error","getopt","gnu_getopt"]
import os
+try:
+ from gettext import gettext as _
+except ImportError:
+ # Bootstrapping Python: gettext's dependencies not built yet
+ def _(s): return s
class GetoptError(Exception):
opt = ''
@@ -153,10 +158,10 @@ def do_longs(opts, opt, longopts, args):
if has_arg:
if optarg is None:
if not args:
- raise GetoptError('option --%s requires argument' % opt, opt)
+ raise GetoptError(_('option --%s requires argument') % opt, opt)
optarg, args = args[0], args[1:]
elif optarg is not None:
- raise GetoptError('option --%s must not have an argument' % opt, opt)
+ raise GetoptError(_('option --%s must not have an argument') % opt, opt)
opts.append(('--' + opt, optarg or ''))
return opts, args
@@ -166,7 +171,7 @@ def do_longs(opts, opt, longopts, args):
def long_has_args(opt, longopts):
possibilities = [o for o in longopts if o.startswith(opt)]
if not possibilities:
- raise GetoptError('option --%s not recognized' % opt, opt)
+ raise GetoptError(_('option --%s not recognized') % opt, opt)
# Is there an exact match?
if opt in possibilities:
return False, opt
@@ -176,7 +181,7 @@ def long_has_args(opt, longopts):
if len(possibilities) > 1:
# XXX since possibilities contains all valid continuations, might be
# nice to work them into the error msg
- raise GetoptError('option --%s not a unique prefix' % opt, opt)
+ raise GetoptError(_('option --%s not a unique prefix') % opt, opt)
assert len(possibilities) == 1
unique_match = possibilities[0]
has_arg = unique_match.endswith('=')
@@ -190,7 +195,7 @@ def do_shorts(opts, optstring, shortopts, args):
if short_has_arg(opt, shortopts):
if optstring == '':
if not args:
- raise GetoptError('option -%s requires argument' % opt,
+ raise GetoptError(_('option -%s requires argument') % opt,
opt)
optstring, args = args[0], args[1:]
optarg, optstring = optstring, ''
@@ -203,7 +208,7 @@ def short_has_arg(opt, shortopts):
for i in range(len(shortopts)):
if opt == shortopts[i] != ':':
return shortopts.startswith(':', i+1)
- raise GetoptError('option -%s not recognized' % opt, opt)
+ raise GetoptError(_('option -%s not recognized') % opt, opt)
if __name__ == '__main__':
import sys
diff --git a/Lib/getpass.py b/Lib/getpass.py
index dc02bd1..0044742 100644
--- a/Lib/getpass.py
+++ b/Lib/getpass.py
@@ -72,7 +72,7 @@ def unix_getpass(prompt='Password: ', stream=None):
finally:
termios.tcsetattr(fd, tcsetattr_flags, old)
stream.flush() # issue7208
- except termios.error as e:
+ except termios.error:
if passwd is not None:
# _raw_input succeeded. The final tcsetattr failed. Reraise
# instead of leaving the terminal in an unknown state.
@@ -145,8 +145,6 @@ def getuser():
"""
- import os
-
for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'):
user = os.environ.get(name)
if user:
diff --git a/Lib/gettext.py b/Lib/gettext.py
index 256e331..e43f044 100644
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@@ -55,7 +55,7 @@ __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog',
'dgettext', 'dngettext', 'gettext', 'ngettext',
]
-_default_localedir = os.path.join(sys.prefix, 'share', 'locale')
+_default_localedir = os.path.join(sys.base_prefix, 'share', 'locale')
def c2py(plural):
diff --git a/Lib/gzip.py b/Lib/gzip.py
index e1b43a5..d8abffd 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -16,17 +16,49 @@ FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
READ, WRITE = 1, 2
-def U32(i):
- """Return i as an unsigned integer, assuming it fits in 32 bits.
- If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
- """
- if i < 0:
- i += 1 << 32
- return i
+def open(filename, mode="rb", compresslevel=9,
+ encoding=None, errors=None, newline=None):
+ """Open a gzip-compressed file in binary or text mode.
+
+ The filename argument can be an actual filename (a str or bytes object), or
+ an existing file object to read from or write to.
+
+ The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode,
+ or "rt", "wt" or "at" for text mode. The default mode is "rb", and the
+ default compresslevel is 9.
-def LOWU32(i):
- """Return the low-order 32 bits, as a non-negative int"""
- return i & 0xFFFFFFFF
+ For binary mode, this function is equivalent to the GzipFile constructor:
+ GzipFile(filename, mode, compresslevel). In this case, the encoding, errors
+ and newline arguments must not be provided.
+
+ For text mode, a GzipFile object is created, and wrapped in an
+ io.TextIOWrapper instance with the specified encoding, error handling
+ behavior, and line ending(s).
+
+ """
+ if "t" in mode:
+ if "b" in mode:
+ raise ValueError("Invalid mode: %r" % (mode,))
+ else:
+ if encoding is not None:
+ raise ValueError("Argument 'encoding' not supported in binary mode")
+ if errors is not None:
+ raise ValueError("Argument 'errors' not supported in binary mode")
+ if newline is not None:
+ raise ValueError("Argument 'newline' not supported in binary mode")
+
+ gz_mode = mode.replace("t", "")
+ if isinstance(filename, (str, bytes)):
+ binary_file = GzipFile(filename, gz_mode, compresslevel)
+ elif hasattr(filename, "read") or hasattr(filename, "write"):
+ binary_file = GzipFile(None, gz_mode, compresslevel, filename)
+ else:
+ raise TypeError("filename must be a str or bytes object, or a file")
+
+ if "t" in mode:
+ return io.TextIOWrapper(binary_file, encoding, errors, newline)
+ else:
+ return binary_file
def write32u(output, value):
# The L format writes the bit pattern correctly whether signed
@@ -36,15 +68,6 @@ def write32u(output, value):
def read32(input):
return struct.unpack("<I", input.read(4))[0]
-def open(filename, mode="rb", compresslevel=9):
- """Shorthand for GzipFile(filename, mode, compresslevel).
-
- The filename argument is required; mode defaults to 'rb'
- and compresslevel defaults to 9.
-
- """
- return GzipFile(filename, mode, compresslevel)
-
class _PaddedFile:
"""Minimal read-only file object that prepends a string to the contents
of an actual file. Shouldn't be used outside of gzip.py, as it lacks
@@ -106,7 +129,7 @@ class GzipFile(io.BufferedIOBase):
the exception of the readinto() and truncate() methods.
This class only supports opening files in binary mode. If you need to open a
- compressed file in text mode, wrap your GzipFile with an io.TextIOWrapper.
+ compressed file in text mode, use the gzip.open() function.
"""
@@ -153,7 +176,7 @@ class GzipFile(io.BufferedIOBase):
"""
if mode and ('t' in mode or 'U' in mode):
- raise IOError("Mode " + mode + " not supported")
+ raise ValueError("Invalid mode: {!r}".format(mode))
if mode and 'b' not in mode:
mode += 'b'
if fileobj is None:
@@ -163,10 +186,9 @@ class GzipFile(io.BufferedIOBase):
if not isinstance(filename, (str, bytes)):
filename = ''
if mode is None:
- if hasattr(fileobj, 'mode'): mode = fileobj.mode
- else: mode = 'rb'
+ mode = getattr(fileobj, 'mode', 'rb')
- if mode[0:1] == 'r':
+ if mode.startswith('r'):
self.mode = READ
# Set flag indicating start of a new member
self._new_member = True
@@ -181,7 +203,7 @@ class GzipFile(io.BufferedIOBase):
self.min_readsize = 100
fileobj = _PaddedFile(fileobj)
- elif mode[0:1] == 'w' or mode[0:1] == 'a':
+ elif mode.startswith(('w', 'a')):
self.mode = WRITE
self._init_write(filename)
self.compress = zlib.compressobj(compresslevel,
@@ -190,7 +212,7 @@ class GzipFile(io.BufferedIOBase):
zlib.DEF_MEM_LEVEL,
0)
else:
- raise IOError("Mode " + mode + " not supported")
+ raise ValueError("Invalid mode: {!r}".format(mode))
self.fileobj = fileobj
self.offset = 0
@@ -353,6 +375,31 @@ class GzipFile(io.BufferedIOBase):
self.offset += size
return chunk
+ def read1(self, size=-1):
+ self._check_closed()
+ if self.mode != READ:
+ import errno
+ raise IOError(errno.EBADF, "read1() on write-only GzipFile object")
+
+ if self.extrasize <= 0 and self.fileobj is None:
+ return b''
+
+ try:
+ # For certain input data, a single call to _read() may not return
+ # any data. In this case, retry until we get some data or reach EOF.
+ while self.extrasize <= 0:
+ self._read()
+ except EOFError:
+ pass
+ if size < 0 or size > self.extrasize:
+ size = self.extrasize
+
+ offset = self.offset - self.extrastart
+ chunk = self.extrabuf[offset: offset + size]
+ self.extrasize -= size
+ self.offset += size
+ return chunk
+
def peek(self, n):
if self.mode != READ:
import errno
@@ -366,8 +413,10 @@ class GzipFile(io.BufferedIOBase):
if self.fileobj is None:
return b''
try:
- # 1024 is the same buffering heuristic used in read()
- self._read(max(n, 1024))
+ # Ensure that we don't return b"" if we haven't reached EOF.
+ while self.extrasize == 0:
+ # 1024 is the same buffering heuristic used in read()
+ self._read(max(n, 1024))
except EOFError:
pass
offset = self.offset - self.extrastart
diff --git a/Lib/hmac.py b/Lib/hmac.py
index 956fc65..4297a71 100644
--- a/Lib/hmac.py
+++ b/Lib/hmac.py
@@ -4,6 +4,7 @@ Implements the HMAC algorithm as described by RFC 2104.
"""
import warnings as _warnings
+from operator import _compare_digest as compare_digest
trans_5C = bytes((x ^ 0x5C) for x in range(256))
trans_36 = bytes((x ^ 0x36) for x in range(256))
@@ -13,6 +14,7 @@ trans_36 = bytes((x ^ 0x36) for x in range(256))
digest_size = None
+
class HMAC:
"""RFC 2104 HMAC class. Also complies with RFC 4231.
@@ -33,7 +35,7 @@ class HMAC:
"""
if not isinstance(key, bytes):
- raise TypeError("expected bytes, but got %r" % type(key).__name__)
+ raise TypeError("key: expected bytes, but got %r" % type(key).__name__)
if digestmod is None:
import hashlib
diff --git a/Lib/html/entities.py b/Lib/html/entities.py
index e2b7bf1..ff72cf4 100644
--- a/Lib/html/entities.py
+++ b/Lib/html/entities.py
@@ -256,6 +256,2242 @@ name2codepoint = {
'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070
}
+
+# maps the HTML5 named character references to the equivalent Unicode character(s)
+html5 = {
+ 'Aacute;': '\xc1',
+ 'Aacute': '\xc1',
+ 'aacute;': '\xe1',
+ 'aacute': '\xe1',
+ 'Abreve;': '\u0102',
+ 'abreve;': '\u0103',
+ 'ac;': '\u223e',
+ 'acd;': '\u223f',
+ 'acE;': '\u223e\u0333',
+ 'Acirc;': '\xc2',
+ 'Acirc': '\xc2',
+ 'acirc;': '\xe2',
+ 'acirc': '\xe2',
+ 'acute;': '\xb4',
+ 'acute': '\xb4',
+ 'Acy;': '\u0410',
+ 'acy;': '\u0430',
+ 'AElig;': '\xc6',
+ 'AElig': '\xc6',
+ 'aelig;': '\xe6',
+ 'aelig': '\xe6',
+ 'af;': '\u2061',
+ 'Afr;': '\U0001d504',
+ 'afr;': '\U0001d51e',
+ 'Agrave;': '\xc0',
+ 'Agrave': '\xc0',
+ 'agrave;': '\xe0',
+ 'agrave': '\xe0',
+ 'alefsym;': '\u2135',
+ 'aleph;': '\u2135',
+ 'Alpha;': '\u0391',
+ 'alpha;': '\u03b1',
+ 'Amacr;': '\u0100',
+ 'amacr;': '\u0101',
+ 'amalg;': '\u2a3f',
+ 'AMP;': '&',
+ 'AMP': '&',
+ 'amp;': '&',
+ 'amp': '&',
+ 'And;': '\u2a53',
+ 'and;': '\u2227',
+ 'andand;': '\u2a55',
+ 'andd;': '\u2a5c',
+ 'andslope;': '\u2a58',
+ 'andv;': '\u2a5a',
+ 'ang;': '\u2220',
+ 'ange;': '\u29a4',
+ 'angle;': '\u2220',
+ 'angmsd;': '\u2221',
+ 'angmsdaa;': '\u29a8',
+ 'angmsdab;': '\u29a9',
+ 'angmsdac;': '\u29aa',
+ 'angmsdad;': '\u29ab',
+ 'angmsdae;': '\u29ac',
+ 'angmsdaf;': '\u29ad',
+ 'angmsdag;': '\u29ae',
+ 'angmsdah;': '\u29af',
+ 'angrt;': '\u221f',
+ 'angrtvb;': '\u22be',
+ 'angrtvbd;': '\u299d',
+ 'angsph;': '\u2222',
+ 'angst;': '\xc5',
+ 'angzarr;': '\u237c',
+ 'Aogon;': '\u0104',
+ 'aogon;': '\u0105',
+ 'Aopf;': '\U0001d538',
+ 'aopf;': '\U0001d552',
+ 'ap;': '\u2248',
+ 'apacir;': '\u2a6f',
+ 'apE;': '\u2a70',
+ 'ape;': '\u224a',
+ 'apid;': '\u224b',
+ 'apos;': "'",
+ 'ApplyFunction;': '\u2061',
+ 'approx;': '\u2248',
+ 'approxeq;': '\u224a',
+ 'Aring;': '\xc5',
+ 'Aring': '\xc5',
+ 'aring;': '\xe5',
+ 'aring': '\xe5',
+ 'Ascr;': '\U0001d49c',
+ 'ascr;': '\U0001d4b6',
+ 'Assign;': '\u2254',
+ 'ast;': '*',
+ 'asymp;': '\u2248',
+ 'asympeq;': '\u224d',
+ 'Atilde;': '\xc3',
+ 'Atilde': '\xc3',
+ 'atilde;': '\xe3',
+ 'atilde': '\xe3',
+ 'Auml;': '\xc4',
+ 'Auml': '\xc4',
+ 'auml;': '\xe4',
+ 'auml': '\xe4',
+ 'awconint;': '\u2233',
+ 'awint;': '\u2a11',
+ 'backcong;': '\u224c',
+ 'backepsilon;': '\u03f6',
+ 'backprime;': '\u2035',
+ 'backsim;': '\u223d',
+ 'backsimeq;': '\u22cd',
+ 'Backslash;': '\u2216',
+ 'Barv;': '\u2ae7',
+ 'barvee;': '\u22bd',
+ 'Barwed;': '\u2306',
+ 'barwed;': '\u2305',
+ 'barwedge;': '\u2305',
+ 'bbrk;': '\u23b5',
+ 'bbrktbrk;': '\u23b6',
+ 'bcong;': '\u224c',
+ 'Bcy;': '\u0411',
+ 'bcy;': '\u0431',
+ 'bdquo;': '\u201e',
+ 'becaus;': '\u2235',
+ 'Because;': '\u2235',
+ 'because;': '\u2235',
+ 'bemptyv;': '\u29b0',
+ 'bepsi;': '\u03f6',
+ 'bernou;': '\u212c',
+ 'Bernoullis;': '\u212c',
+ 'Beta;': '\u0392',
+ 'beta;': '\u03b2',
+ 'beth;': '\u2136',
+ 'between;': '\u226c',
+ 'Bfr;': '\U0001d505',
+ 'bfr;': '\U0001d51f',
+ 'bigcap;': '\u22c2',
+ 'bigcirc;': '\u25ef',
+ 'bigcup;': '\u22c3',
+ 'bigodot;': '\u2a00',
+ 'bigoplus;': '\u2a01',
+ 'bigotimes;': '\u2a02',
+ 'bigsqcup;': '\u2a06',
+ 'bigstar;': '\u2605',
+ 'bigtriangledown;': '\u25bd',
+ 'bigtriangleup;': '\u25b3',
+ 'biguplus;': '\u2a04',
+ 'bigvee;': '\u22c1',
+ 'bigwedge;': '\u22c0',
+ 'bkarow;': '\u290d',
+ 'blacklozenge;': '\u29eb',
+ 'blacksquare;': '\u25aa',
+ 'blacktriangle;': '\u25b4',
+ 'blacktriangledown;': '\u25be',
+ 'blacktriangleleft;': '\u25c2',
+ 'blacktriangleright;': '\u25b8',
+ 'blank;': '\u2423',
+ 'blk12;': '\u2592',
+ 'blk14;': '\u2591',
+ 'blk34;': '\u2593',
+ 'block;': '\u2588',
+ 'bne;': '=\u20e5',
+ 'bnequiv;': '\u2261\u20e5',
+ 'bNot;': '\u2aed',
+ 'bnot;': '\u2310',
+ 'Bopf;': '\U0001d539',
+ 'bopf;': '\U0001d553',
+ 'bot;': '\u22a5',
+ 'bottom;': '\u22a5',
+ 'bowtie;': '\u22c8',
+ 'boxbox;': '\u29c9',
+ 'boxDL;': '\u2557',
+ 'boxDl;': '\u2556',
+ 'boxdL;': '\u2555',
+ 'boxdl;': '\u2510',
+ 'boxDR;': '\u2554',
+ 'boxDr;': '\u2553',
+ 'boxdR;': '\u2552',
+ 'boxdr;': '\u250c',
+ 'boxH;': '\u2550',
+ 'boxh;': '\u2500',
+ 'boxHD;': '\u2566',
+ 'boxHd;': '\u2564',
+ 'boxhD;': '\u2565',
+ 'boxhd;': '\u252c',
+ 'boxHU;': '\u2569',
+ 'boxHu;': '\u2567',
+ 'boxhU;': '\u2568',
+ 'boxhu;': '\u2534',
+ 'boxminus;': '\u229f',
+ 'boxplus;': '\u229e',
+ 'boxtimes;': '\u22a0',
+ 'boxUL;': '\u255d',
+ 'boxUl;': '\u255c',
+ 'boxuL;': '\u255b',
+ 'boxul;': '\u2518',
+ 'boxUR;': '\u255a',
+ 'boxUr;': '\u2559',
+ 'boxuR;': '\u2558',
+ 'boxur;': '\u2514',
+ 'boxV;': '\u2551',
+ 'boxv;': '\u2502',
+ 'boxVH;': '\u256c',
+ 'boxVh;': '\u256b',
+ 'boxvH;': '\u256a',
+ 'boxvh;': '\u253c',
+ 'boxVL;': '\u2563',
+ 'boxVl;': '\u2562',
+ 'boxvL;': '\u2561',
+ 'boxvl;': '\u2524',
+ 'boxVR;': '\u2560',
+ 'boxVr;': '\u255f',
+ 'boxvR;': '\u255e',
+ 'boxvr;': '\u251c',
+ 'bprime;': '\u2035',
+ 'Breve;': '\u02d8',
+ 'breve;': '\u02d8',
+ 'brvbar;': '\xa6',
+ 'brvbar': '\xa6',
+ 'Bscr;': '\u212c',
+ 'bscr;': '\U0001d4b7',
+ 'bsemi;': '\u204f',
+ 'bsim;': '\u223d',
+ 'bsime;': '\u22cd',
+ 'bsol;': '\\',
+ 'bsolb;': '\u29c5',
+ 'bsolhsub;': '\u27c8',
+ 'bull;': '\u2022',
+ 'bullet;': '\u2022',
+ 'bump;': '\u224e',
+ 'bumpE;': '\u2aae',
+ 'bumpe;': '\u224f',
+ 'Bumpeq;': '\u224e',
+ 'bumpeq;': '\u224f',
+ 'Cacute;': '\u0106',
+ 'cacute;': '\u0107',
+ 'Cap;': '\u22d2',
+ 'cap;': '\u2229',
+ 'capand;': '\u2a44',
+ 'capbrcup;': '\u2a49',
+ 'capcap;': '\u2a4b',
+ 'capcup;': '\u2a47',
+ 'capdot;': '\u2a40',
+ 'CapitalDifferentialD;': '\u2145',
+ 'caps;': '\u2229\ufe00',
+ 'caret;': '\u2041',
+ 'caron;': '\u02c7',
+ 'Cayleys;': '\u212d',
+ 'ccaps;': '\u2a4d',
+ 'Ccaron;': '\u010c',
+ 'ccaron;': '\u010d',
+ 'Ccedil;': '\xc7',
+ 'Ccedil': '\xc7',
+ 'ccedil;': '\xe7',
+ 'ccedil': '\xe7',
+ 'Ccirc;': '\u0108',
+ 'ccirc;': '\u0109',
+ 'Cconint;': '\u2230',
+ 'ccups;': '\u2a4c',
+ 'ccupssm;': '\u2a50',
+ 'Cdot;': '\u010a',
+ 'cdot;': '\u010b',
+ 'cedil;': '\xb8',
+ 'cedil': '\xb8',
+ 'Cedilla;': '\xb8',
+ 'cemptyv;': '\u29b2',
+ 'cent;': '\xa2',
+ 'cent': '\xa2',
+ 'CenterDot;': '\xb7',
+ 'centerdot;': '\xb7',
+ 'Cfr;': '\u212d',
+ 'cfr;': '\U0001d520',
+ 'CHcy;': '\u0427',
+ 'chcy;': '\u0447',
+ 'check;': '\u2713',
+ 'checkmark;': '\u2713',
+ 'Chi;': '\u03a7',
+ 'chi;': '\u03c7',
+ 'cir;': '\u25cb',
+ 'circ;': '\u02c6',
+ 'circeq;': '\u2257',
+ 'circlearrowleft;': '\u21ba',
+ 'circlearrowright;': '\u21bb',
+ 'circledast;': '\u229b',
+ 'circledcirc;': '\u229a',
+ 'circleddash;': '\u229d',
+ 'CircleDot;': '\u2299',
+ 'circledR;': '\xae',
+ 'circledS;': '\u24c8',
+ 'CircleMinus;': '\u2296',
+ 'CirclePlus;': '\u2295',
+ 'CircleTimes;': '\u2297',
+ 'cirE;': '\u29c3',
+ 'cire;': '\u2257',
+ 'cirfnint;': '\u2a10',
+ 'cirmid;': '\u2aef',
+ 'cirscir;': '\u29c2',
+ 'ClockwiseContourIntegral;': '\u2232',
+ 'CloseCurlyDoubleQuote;': '\u201d',
+ 'CloseCurlyQuote;': '\u2019',
+ 'clubs;': '\u2663',
+ 'clubsuit;': '\u2663',
+ 'Colon;': '\u2237',
+ 'colon;': ':',
+ 'Colone;': '\u2a74',
+ 'colone;': '\u2254',
+ 'coloneq;': '\u2254',
+ 'comma;': ',',
+ 'commat;': '@',
+ 'comp;': '\u2201',
+ 'compfn;': '\u2218',
+ 'complement;': '\u2201',
+ 'complexes;': '\u2102',
+ 'cong;': '\u2245',
+ 'congdot;': '\u2a6d',
+ 'Congruent;': '\u2261',
+ 'Conint;': '\u222f',
+ 'conint;': '\u222e',
+ 'ContourIntegral;': '\u222e',
+ 'Copf;': '\u2102',
+ 'copf;': '\U0001d554',
+ 'coprod;': '\u2210',
+ 'Coproduct;': '\u2210',
+ 'COPY;': '\xa9',
+ 'COPY': '\xa9',
+ 'copy;': '\xa9',
+ 'copy': '\xa9',
+ 'copysr;': '\u2117',
+ 'CounterClockwiseContourIntegral;': '\u2233',
+ 'crarr;': '\u21b5',
+ 'Cross;': '\u2a2f',
+ 'cross;': '\u2717',
+ 'Cscr;': '\U0001d49e',
+ 'cscr;': '\U0001d4b8',
+ 'csub;': '\u2acf',
+ 'csube;': '\u2ad1',
+ 'csup;': '\u2ad0',
+ 'csupe;': '\u2ad2',
+ 'ctdot;': '\u22ef',
+ 'cudarrl;': '\u2938',
+ 'cudarrr;': '\u2935',
+ 'cuepr;': '\u22de',
+ 'cuesc;': '\u22df',
+ 'cularr;': '\u21b6',
+ 'cularrp;': '\u293d',
+ 'Cup;': '\u22d3',
+ 'cup;': '\u222a',
+ 'cupbrcap;': '\u2a48',
+ 'CupCap;': '\u224d',
+ 'cupcap;': '\u2a46',
+ 'cupcup;': '\u2a4a',
+ 'cupdot;': '\u228d',
+ 'cupor;': '\u2a45',
+ 'cups;': '\u222a\ufe00',
+ 'curarr;': '\u21b7',
+ 'curarrm;': '\u293c',
+ 'curlyeqprec;': '\u22de',
+ 'curlyeqsucc;': '\u22df',
+ 'curlyvee;': '\u22ce',
+ 'curlywedge;': '\u22cf',
+ 'curren;': '\xa4',
+ 'curren': '\xa4',
+ 'curvearrowleft;': '\u21b6',
+ 'curvearrowright;': '\u21b7',
+ 'cuvee;': '\u22ce',
+ 'cuwed;': '\u22cf',
+ 'cwconint;': '\u2232',
+ 'cwint;': '\u2231',
+ 'cylcty;': '\u232d',
+ 'Dagger;': '\u2021',
+ 'dagger;': '\u2020',
+ 'daleth;': '\u2138',
+ 'Darr;': '\u21a1',
+ 'dArr;': '\u21d3',
+ 'darr;': '\u2193',
+ 'dash;': '\u2010',
+ 'Dashv;': '\u2ae4',
+ 'dashv;': '\u22a3',
+ 'dbkarow;': '\u290f',
+ 'dblac;': '\u02dd',
+ 'Dcaron;': '\u010e',
+ 'dcaron;': '\u010f',
+ 'Dcy;': '\u0414',
+ 'dcy;': '\u0434',
+ 'DD;': '\u2145',
+ 'dd;': '\u2146',
+ 'ddagger;': '\u2021',
+ 'ddarr;': '\u21ca',
+ 'DDotrahd;': '\u2911',
+ 'ddotseq;': '\u2a77',
+ 'deg;': '\xb0',
+ 'deg': '\xb0',
+ 'Del;': '\u2207',
+ 'Delta;': '\u0394',
+ 'delta;': '\u03b4',
+ 'demptyv;': '\u29b1',
+ 'dfisht;': '\u297f',
+ 'Dfr;': '\U0001d507',
+ 'dfr;': '\U0001d521',
+ 'dHar;': '\u2965',
+ 'dharl;': '\u21c3',
+ 'dharr;': '\u21c2',
+ 'DiacriticalAcute;': '\xb4',
+ 'DiacriticalDot;': '\u02d9',
+ 'DiacriticalDoubleAcute;': '\u02dd',
+ 'DiacriticalGrave;': '`',
+ 'DiacriticalTilde;': '\u02dc',
+ 'diam;': '\u22c4',
+ 'Diamond;': '\u22c4',
+ 'diamond;': '\u22c4',
+ 'diamondsuit;': '\u2666',
+ 'diams;': '\u2666',
+ 'die;': '\xa8',
+ 'DifferentialD;': '\u2146',
+ 'digamma;': '\u03dd',
+ 'disin;': '\u22f2',
+ 'div;': '\xf7',
+ 'divide;': '\xf7',
+ 'divide': '\xf7',
+ 'divideontimes;': '\u22c7',
+ 'divonx;': '\u22c7',
+ 'DJcy;': '\u0402',
+ 'djcy;': '\u0452',
+ 'dlcorn;': '\u231e',
+ 'dlcrop;': '\u230d',
+ 'dollar;': '$',
+ 'Dopf;': '\U0001d53b',
+ 'dopf;': '\U0001d555',
+ 'Dot;': '\xa8',
+ 'dot;': '\u02d9',
+ 'DotDot;': '\u25cc\u20dc',
+ 'doteq;': '\u2250',
+ 'doteqdot;': '\u2251',
+ 'DotEqual;': '\u2250',
+ 'dotminus;': '\u2238',
+ 'dotplus;': '\u2214',
+ 'dotsquare;': '\u22a1',
+ 'doublebarwedge;': '\u2306',
+ 'DoubleContourIntegral;': '\u222f',
+ 'DoubleDot;': '\xa8',
+ 'DoubleDownArrow;': '\u21d3',
+ 'DoubleLeftArrow;': '\u21d0',
+ 'DoubleLeftRightArrow;': '\u21d4',
+ 'DoubleLeftTee;': '\u2ae4',
+ 'DoubleLongLeftArrow;': '\u27f8',
+ 'DoubleLongLeftRightArrow;': '\u27fa',
+ 'DoubleLongRightArrow;': '\u27f9',
+ 'DoubleRightArrow;': '\u21d2',
+ 'DoubleRightTee;': '\u22a8',
+ 'DoubleUpArrow;': '\u21d1',
+ 'DoubleUpDownArrow;': '\u21d5',
+ 'DoubleVerticalBar;': '\u2225',
+ 'DownArrow;': '\u2193',
+ 'Downarrow;': '\u21d3',
+ 'downarrow;': '\u2193',
+ 'DownArrowBar;': '\u2913',
+ 'DownArrowUpArrow;': '\u21f5',
+ 'DownBreve;': '\u25cc\u0311',
+ 'downdownarrows;': '\u21ca',
+ 'downharpoonleft;': '\u21c3',
+ 'downharpoonright;': '\u21c2',
+ 'DownLeftRightVector;': '\u2950',
+ 'DownLeftTeeVector;': '\u295e',
+ 'DownLeftVector;': '\u21bd',
+ 'DownLeftVectorBar;': '\u2956',
+ 'DownRightTeeVector;': '\u295f',
+ 'DownRightVector;': '\u21c1',
+ 'DownRightVectorBar;': '\u2957',
+ 'DownTee;': '\u22a4',
+ 'DownTeeArrow;': '\u21a7',
+ 'drbkarow;': '\u2910',
+ 'drcorn;': '\u231f',
+ 'drcrop;': '\u230c',
+ 'Dscr;': '\U0001d49f',
+ 'dscr;': '\U0001d4b9',
+ 'DScy;': '\u0405',
+ 'dscy;': '\u0455',
+ 'dsol;': '\u29f6',
+ 'Dstrok;': '\u0110',
+ 'dstrok;': '\u0111',
+ 'dtdot;': '\u22f1',
+ 'dtri;': '\u25bf',
+ 'dtrif;': '\u25be',
+ 'duarr;': '\u21f5',
+ 'duhar;': '\u296f',
+ 'dwangle;': '\u29a6',
+ 'DZcy;': '\u040f',
+ 'dzcy;': '\u045f',
+ 'dzigrarr;': '\u27ff',
+ 'Eacute;': '\xc9',
+ 'Eacute': '\xc9',
+ 'eacute;': '\xe9',
+ 'eacute': '\xe9',
+ 'easter;': '\u2a6e',
+ 'Ecaron;': '\u011a',
+ 'ecaron;': '\u011b',
+ 'ecir;': '\u2256',
+ 'Ecirc;': '\xca',
+ 'Ecirc': '\xca',
+ 'ecirc;': '\xea',
+ 'ecirc': '\xea',
+ 'ecolon;': '\u2255',
+ 'Ecy;': '\u042d',
+ 'ecy;': '\u044d',
+ 'eDDot;': '\u2a77',
+ 'Edot;': '\u0116',
+ 'eDot;': '\u2251',
+ 'edot;': '\u0117',
+ 'ee;': '\u2147',
+ 'efDot;': '\u2252',
+ 'Efr;': '\U0001d508',
+ 'efr;': '\U0001d522',
+ 'eg;': '\u2a9a',
+ 'Egrave;': '\xc8',
+ 'Egrave': '\xc8',
+ 'egrave;': '\xe8',
+ 'egrave': '\xe8',
+ 'egs;': '\u2a96',
+ 'egsdot;': '\u2a98',
+ 'el;': '\u2a99',
+ 'Element;': '\u2208',
+ 'elinters;': '\u23e7',
+ 'ell;': '\u2113',
+ 'els;': '\u2a95',
+ 'elsdot;': '\u2a97',
+ 'Emacr;': '\u0112',
+ 'emacr;': '\u0113',
+ 'empty;': '\u2205',
+ 'emptyset;': '\u2205',
+ 'EmptySmallSquare;': '\u25fb',
+ 'emptyv;': '\u2205',
+ 'EmptyVerySmallSquare;': '\u25ab',
+ 'emsp;': '\u2003',
+ 'emsp13;': '\u2004',
+ 'emsp14;': '\u2005',
+ 'ENG;': '\u014a',
+ 'eng;': '\u014b',
+ 'ensp;': '\u2002',
+ 'Eogon;': '\u0118',
+ 'eogon;': '\u0119',
+ 'Eopf;': '\U0001d53c',
+ 'eopf;': '\U0001d556',
+ 'epar;': '\u22d5',
+ 'eparsl;': '\u29e3',
+ 'eplus;': '\u2a71',
+ 'epsi;': '\u03b5',
+ 'Epsilon;': '\u0395',
+ 'epsilon;': '\u03b5',
+ 'epsiv;': '\u03f5',
+ 'eqcirc;': '\u2256',
+ 'eqcolon;': '\u2255',
+ 'eqsim;': '\u2242',
+ 'eqslantgtr;': '\u2a96',
+ 'eqslantless;': '\u2a95',
+ 'Equal;': '\u2a75',
+ 'equals;': '=',
+ 'EqualTilde;': '\u2242',
+ 'equest;': '\u225f',
+ 'Equilibrium;': '\u21cc',
+ 'equiv;': '\u2261',
+ 'equivDD;': '\u2a78',
+ 'eqvparsl;': '\u29e5',
+ 'erarr;': '\u2971',
+ 'erDot;': '\u2253',
+ 'Escr;': '\u2130',
+ 'escr;': '\u212f',
+ 'esdot;': '\u2250',
+ 'Esim;': '\u2a73',
+ 'esim;': '\u2242',
+ 'Eta;': '\u0397',
+ 'eta;': '\u03b7',
+ 'ETH;': '\xd0',
+ 'ETH': '\xd0',
+ 'eth;': '\xf0',
+ 'eth': '\xf0',
+ 'Euml;': '\xcb',
+ 'Euml': '\xcb',
+ 'euml;': '\xeb',
+ 'euml': '\xeb',
+ 'euro;': '\u20ac',
+ 'excl;': '!',
+ 'exist;': '\u2203',
+ 'Exists;': '\u2203',
+ 'expectation;': '\u2130',
+ 'ExponentialE;': '\u2147',
+ 'exponentiale;': '\u2147',
+ 'fallingdotseq;': '\u2252',
+ 'Fcy;': '\u0424',
+ 'fcy;': '\u0444',
+ 'female;': '\u2640',
+ 'ffilig;': '\ufb03',
+ 'fflig;': '\ufb00',
+ 'ffllig;': '\ufb04',
+ 'Ffr;': '\U0001d509',
+ 'ffr;': '\U0001d523',
+ 'filig;': '\ufb01',
+ 'FilledSmallSquare;': '\u25fc',
+ 'FilledVerySmallSquare;': '\u25aa',
+ 'fjlig;': 'fj',
+ 'flat;': '\u266d',
+ 'fllig;': '\ufb02',
+ 'fltns;': '\u25b1',
+ 'fnof;': '\u0192',
+ 'Fopf;': '\U0001d53d',
+ 'fopf;': '\U0001d557',
+ 'ForAll;': '\u2200',
+ 'forall;': '\u2200',
+ 'fork;': '\u22d4',
+ 'forkv;': '\u2ad9',
+ 'Fouriertrf;': '\u2131',
+ 'fpartint;': '\u2a0d',
+ 'frac12;': '\xbd',
+ 'frac12': '\xbd',
+ 'frac13;': '\u2153',
+ 'frac14;': '\xbc',
+ 'frac14': '\xbc',
+ 'frac15;': '\u2155',
+ 'frac16;': '\u2159',
+ 'frac18;': '\u215b',
+ 'frac23;': '\u2154',
+ 'frac25;': '\u2156',
+ 'frac34;': '\xbe',
+ 'frac34': '\xbe',
+ 'frac35;': '\u2157',
+ 'frac38;': '\u215c',
+ 'frac45;': '\u2158',
+ 'frac56;': '\u215a',
+ 'frac58;': '\u215d',
+ 'frac78;': '\u215e',
+ 'frasl;': '\u2044',
+ 'frown;': '\u2322',
+ 'Fscr;': '\u2131',
+ 'fscr;': '\U0001d4bb',
+ 'gacute;': '\u01f5',
+ 'Gamma;': '\u0393',
+ 'gamma;': '\u03b3',
+ 'Gammad;': '\u03dc',
+ 'gammad;': '\u03dd',
+ 'gap;': '\u2a86',
+ 'Gbreve;': '\u011e',
+ 'gbreve;': '\u011f',
+ 'Gcedil;': '\u0122',
+ 'Gcirc;': '\u011c',
+ 'gcirc;': '\u011d',
+ 'Gcy;': '\u0413',
+ 'gcy;': '\u0433',
+ 'Gdot;': '\u0120',
+ 'gdot;': '\u0121',
+ 'gE;': '\u2267',
+ 'ge;': '\u2265',
+ 'gEl;': '\u2a8c',
+ 'gel;': '\u22db',
+ 'geq;': '\u2265',
+ 'geqq;': '\u2267',
+ 'geqslant;': '\u2a7e',
+ 'ges;': '\u2a7e',
+ 'gescc;': '\u2aa9',
+ 'gesdot;': '\u2a80',
+ 'gesdoto;': '\u2a82',
+ 'gesdotol;': '\u2a84',
+ 'gesl;': '\u22db\ufe00',
+ 'gesles;': '\u2a94',
+ 'Gfr;': '\U0001d50a',
+ 'gfr;': '\U0001d524',
+ 'Gg;': '\u22d9',
+ 'gg;': '\u226b',
+ 'ggg;': '\u22d9',
+ 'gimel;': '\u2137',
+ 'GJcy;': '\u0403',
+ 'gjcy;': '\u0453',
+ 'gl;': '\u2277',
+ 'gla;': '\u2aa5',
+ 'glE;': '\u2a92',
+ 'glj;': '\u2aa4',
+ 'gnap;': '\u2a8a',
+ 'gnapprox;': '\u2a8a',
+ 'gnE;': '\u2269',
+ 'gne;': '\u2a88',
+ 'gneq;': '\u2a88',
+ 'gneqq;': '\u2269',
+ 'gnsim;': '\u22e7',
+ 'Gopf;': '\U0001d53e',
+ 'gopf;': '\U0001d558',
+ 'grave;': '`',
+ 'GreaterEqual;': '\u2265',
+ 'GreaterEqualLess;': '\u22db',
+ 'GreaterFullEqual;': '\u2267',
+ 'GreaterGreater;': '\u2aa2',
+ 'GreaterLess;': '\u2277',
+ 'GreaterSlantEqual;': '\u2a7e',
+ 'GreaterTilde;': '\u2273',
+ 'Gscr;': '\U0001d4a2',
+ 'gscr;': '\u210a',
+ 'gsim;': '\u2273',
+ 'gsime;': '\u2a8e',
+ 'gsiml;': '\u2a90',
+ 'GT;': '>',
+ 'GT': '>',
+ 'Gt;': '\u226b',
+ 'gt;': '>',
+ 'gt': '>',
+ 'gtcc;': '\u2aa7',
+ 'gtcir;': '\u2a7a',
+ 'gtdot;': '\u22d7',
+ 'gtlPar;': '\u2995',
+ 'gtquest;': '\u2a7c',
+ 'gtrapprox;': '\u2a86',
+ 'gtrarr;': '\u2978',
+ 'gtrdot;': '\u22d7',
+ 'gtreqless;': '\u22db',
+ 'gtreqqless;': '\u2a8c',
+ 'gtrless;': '\u2277',
+ 'gtrsim;': '\u2273',
+ 'gvertneqq;': '\u2269\ufe00',
+ 'gvnE;': '\u2269\ufe00',
+ 'Hacek;': '\u02c7',
+ 'hairsp;': '\u200a',
+ 'half;': '\xbd',
+ 'hamilt;': '\u210b',
+ 'HARDcy;': '\u042a',
+ 'hardcy;': '\u044a',
+ 'hArr;': '\u21d4',
+ 'harr;': '\u2194',
+ 'harrcir;': '\u2948',
+ 'harrw;': '\u21ad',
+ 'Hat;': '^',
+ 'hbar;': '\u210f',
+ 'Hcirc;': '\u0124',
+ 'hcirc;': '\u0125',
+ 'hearts;': '\u2665',
+ 'heartsuit;': '\u2665',
+ 'hellip;': '\u2026',
+ 'hercon;': '\u22b9',
+ 'Hfr;': '\u210c',
+ 'hfr;': '\U0001d525',
+ 'HilbertSpace;': '\u210b',
+ 'hksearow;': '\u2925',
+ 'hkswarow;': '\u2926',
+ 'hoarr;': '\u21ff',
+ 'homtht;': '\u223b',
+ 'hookleftarrow;': '\u21a9',
+ 'hookrightarrow;': '\u21aa',
+ 'Hopf;': '\u210d',
+ 'hopf;': '\U0001d559',
+ 'horbar;': '\u2015',
+ 'HorizontalLine;': '\u2500',
+ 'Hscr;': '\u210b',
+ 'hscr;': '\U0001d4bd',
+ 'hslash;': '\u210f',
+ 'Hstrok;': '\u0126',
+ 'hstrok;': '\u0127',
+ 'HumpDownHump;': '\u224e',
+ 'HumpEqual;': '\u224f',
+ 'hybull;': '\u2043',
+ 'hyphen;': '\u2010',
+ 'Iacute;': '\xcd',
+ 'Iacute': '\xcd',
+ 'iacute;': '\xed',
+ 'iacute': '\xed',
+ 'ic;': '\u2063',
+ 'Icirc;': '\xce',
+ 'Icirc': '\xce',
+ 'icirc;': '\xee',
+ 'icirc': '\xee',
+ 'Icy;': '\u0418',
+ 'icy;': '\u0438',
+ 'Idot;': '\u0130',
+ 'IEcy;': '\u0415',
+ 'iecy;': '\u0435',
+ 'iexcl;': '\xa1',
+ 'iexcl': '\xa1',
+ 'iff;': '\u21d4',
+ 'Ifr;': '\u2111',
+ 'ifr;': '\U0001d526',
+ 'Igrave;': '\xcc',
+ 'Igrave': '\xcc',
+ 'igrave;': '\xec',
+ 'igrave': '\xec',
+ 'ii;': '\u2148',
+ 'iiiint;': '\u2a0c',
+ 'iiint;': '\u222d',
+ 'iinfin;': '\u29dc',
+ 'iiota;': '\u2129',
+ 'IJlig;': '\u0132',
+ 'ijlig;': '\u0133',
+ 'Im;': '\u2111',
+ 'Imacr;': '\u012a',
+ 'imacr;': '\u012b',
+ 'image;': '\u2111',
+ 'ImaginaryI;': '\u2148',
+ 'imagline;': '\u2110',
+ 'imagpart;': '\u2111',
+ 'imath;': '\u0131',
+ 'imof;': '\u22b7',
+ 'imped;': '\u01b5',
+ 'Implies;': '\u21d2',
+ 'in;': '\u2208',
+ 'incare;': '\u2105',
+ 'infin;': '\u221e',
+ 'infintie;': '\u29dd',
+ 'inodot;': '\u0131',
+ 'Int;': '\u222c',
+ 'int;': '\u222b',
+ 'intcal;': '\u22ba',
+ 'integers;': '\u2124',
+ 'Integral;': '\u222b',
+ 'intercal;': '\u22ba',
+ 'Intersection;': '\u22c2',
+ 'intlarhk;': '\u2a17',
+ 'intprod;': '\u2a3c',
+ 'InvisibleComma;': '\u2063',
+ 'InvisibleTimes;': '\u2062',
+ 'IOcy;': '\u0401',
+ 'iocy;': '\u0451',
+ 'Iogon;': '\u012e',
+ 'iogon;': '\u012f',
+ 'Iopf;': '\U0001d540',
+ 'iopf;': '\U0001d55a',
+ 'Iota;': '\u0399',
+ 'iota;': '\u03b9',
+ 'iprod;': '\u2a3c',
+ 'iquest;': '\xbf',
+ 'iquest': '\xbf',
+ 'Iscr;': '\u2110',
+ 'iscr;': '\U0001d4be',
+ 'isin;': '\u2208',
+ 'isindot;': '\u22f5',
+ 'isinE;': '\u22f9',
+ 'isins;': '\u22f4',
+ 'isinsv;': '\u22f3',
+ 'isinv;': '\u2208',
+ 'it;': '\u2062',
+ 'Itilde;': '\u0128',
+ 'itilde;': '\u0129',
+ 'Iukcy;': '\u0406',
+ 'iukcy;': '\u0456',
+ 'Iuml;': '\xcf',
+ 'Iuml': '\xcf',
+ 'iuml;': '\xef',
+ 'iuml': '\xef',
+ 'Jcirc;': '\u0134',
+ 'jcirc;': '\u0135',
+ 'Jcy;': '\u0419',
+ 'jcy;': '\u0439',
+ 'Jfr;': '\U0001d50d',
+ 'jfr;': '\U0001d527',
+ 'jmath;': '\u0237',
+ 'Jopf;': '\U0001d541',
+ 'jopf;': '\U0001d55b',
+ 'Jscr;': '\U0001d4a5',
+ 'jscr;': '\U0001d4bf',
+ 'Jsercy;': '\u0408',
+ 'jsercy;': '\u0458',
+ 'Jukcy;': '\u0404',
+ 'jukcy;': '\u0454',
+ 'Kappa;': '\u039a',
+ 'kappa;': '\u03ba',
+ 'kappav;': '\u03f0',
+ 'Kcedil;': '\u0136',
+ 'kcedil;': '\u0137',
+ 'Kcy;': '\u041a',
+ 'kcy;': '\u043a',
+ 'Kfr;': '\U0001d50e',
+ 'kfr;': '\U0001d528',
+ 'kgreen;': '\u0138',
+ 'KHcy;': '\u0425',
+ 'khcy;': '\u0445',
+ 'KJcy;': '\u040c',
+ 'kjcy;': '\u045c',
+ 'Kopf;': '\U0001d542',
+ 'kopf;': '\U0001d55c',
+ 'Kscr;': '\U0001d4a6',
+ 'kscr;': '\U0001d4c0',
+ 'lAarr;': '\u21da',
+ 'Lacute;': '\u0139',
+ 'lacute;': '\u013a',
+ 'laemptyv;': '\u29b4',
+ 'lagran;': '\u2112',
+ 'Lambda;': '\u039b',
+ 'lambda;': '\u03bb',
+ 'Lang;': '\u27ea',
+ 'lang;': '\u2329',
+ 'langd;': '\u2991',
+ 'langle;': '\u2329',
+ 'lap;': '\u2a85',
+ 'Laplacetrf;': '\u2112',
+ 'laquo;': '\xab',
+ 'laquo': '\xab',
+ 'Larr;': '\u219e',
+ 'lArr;': '\u21d0',
+ 'larr;': '\u2190',
+ 'larrb;': '\u21e4',
+ 'larrbfs;': '\u291f',
+ 'larrfs;': '\u291d',
+ 'larrhk;': '\u21a9',
+ 'larrlp;': '\u21ab',
+ 'larrpl;': '\u2939',
+ 'larrsim;': '\u2973',
+ 'larrtl;': '\u21a2',
+ 'lat;': '\u2aab',
+ 'lAtail;': '\u291b',
+ 'latail;': '\u2919',
+ 'late;': '\u2aad',
+ 'lates;': '\u2aad\ufe00',
+ 'lBarr;': '\u290e',
+ 'lbarr;': '\u290c',
+ 'lbbrk;': '\u2772',
+ 'lbrace;': '{',
+ 'lbrack;': '[',
+ 'lbrke;': '\u298b',
+ 'lbrksld;': '\u298f',
+ 'lbrkslu;': '\u298d',
+ 'Lcaron;': '\u013d',
+ 'lcaron;': '\u013e',
+ 'Lcedil;': '\u013b',
+ 'lcedil;': '\u013c',
+ 'lceil;': '\u2308',
+ 'lcub;': '{',
+ 'Lcy;': '\u041b',
+ 'lcy;': '\u043b',
+ 'ldca;': '\u2936',
+ 'ldquo;': '\u201c',
+ 'ldquor;': '\u201e',
+ 'ldrdhar;': '\u2967',
+ 'ldrushar;': '\u294b',
+ 'ldsh;': '\u21b2',
+ 'lE;': '\u2266',
+ 'le;': '\u2264',
+ 'LeftAngleBracket;': '\u2329',
+ 'LeftArrow;': '\u2190',
+ 'Leftarrow;': '\u21d0',
+ 'leftarrow;': '\u2190',
+ 'LeftArrowBar;': '\u21e4',
+ 'LeftArrowRightArrow;': '\u21c6',
+ 'leftarrowtail;': '\u21a2',
+ 'LeftCeiling;': '\u2308',
+ 'LeftDoubleBracket;': '\u27e6',
+ 'LeftDownTeeVector;': '\u2961',
+ 'LeftDownVector;': '\u21c3',
+ 'LeftDownVectorBar;': '\u2959',
+ 'LeftFloor;': '\u230a',
+ 'leftharpoondown;': '\u21bd',
+ 'leftharpoonup;': '\u21bc',
+ 'leftleftarrows;': '\u21c7',
+ 'LeftRightArrow;': '\u2194',
+ 'Leftrightarrow;': '\u21d4',
+ 'leftrightarrow;': '\u2194',
+ 'leftrightarrows;': '\u21c6',
+ 'leftrightharpoons;': '\u21cb',
+ 'leftrightsquigarrow;': '\u21ad',
+ 'LeftRightVector;': '\u294e',
+ 'LeftTee;': '\u22a3',
+ 'LeftTeeArrow;': '\u21a4',
+ 'LeftTeeVector;': '\u295a',
+ 'leftthreetimes;': '\u22cb',
+ 'LeftTriangle;': '\u22b2',
+ 'LeftTriangleBar;': '\u29cf',
+ 'LeftTriangleEqual;': '\u22b4',
+ 'LeftUpDownVector;': '\u2951',
+ 'LeftUpTeeVector;': '\u2960',
+ 'LeftUpVector;': '\u21bf',
+ 'LeftUpVectorBar;': '\u2958',
+ 'LeftVector;': '\u21bc',
+ 'LeftVectorBar;': '\u2952',
+ 'lEg;': '\u2a8b',
+ 'leg;': '\u22da',
+ 'leq;': '\u2264',
+ 'leqq;': '\u2266',
+ 'leqslant;': '\u2a7d',
+ 'les;': '\u2a7d',
+ 'lescc;': '\u2aa8',
+ 'lesdot;': '\u2a7f',
+ 'lesdoto;': '\u2a81',
+ 'lesdotor;': '\u2a83',
+ 'lesg;': '\u22da\ufe00',
+ 'lesges;': '\u2a93',
+ 'lessapprox;': '\u2a85',
+ 'lessdot;': '\u22d6',
+ 'lesseqgtr;': '\u22da',
+ 'lesseqqgtr;': '\u2a8b',
+ 'LessEqualGreater;': '\u22da',
+ 'LessFullEqual;': '\u2266',
+ 'LessGreater;': '\u2276',
+ 'lessgtr;': '\u2276',
+ 'LessLess;': '\u2aa1',
+ 'lesssim;': '\u2272',
+ 'LessSlantEqual;': '\u2a7d',
+ 'LessTilde;': '\u2272',
+ 'lfisht;': '\u297c',
+ 'lfloor;': '\u230a',
+ 'Lfr;': '\U0001d50f',
+ 'lfr;': '\U0001d529',
+ 'lg;': '\u2276',
+ 'lgE;': '\u2a91',
+ 'lHar;': '\u2962',
+ 'lhard;': '\u21bd',
+ 'lharu;': '\u21bc',
+ 'lharul;': '\u296a',
+ 'lhblk;': '\u2584',
+ 'LJcy;': '\u0409',
+ 'ljcy;': '\u0459',
+ 'Ll;': '\u22d8',
+ 'll;': '\u226a',
+ 'llarr;': '\u21c7',
+ 'llcorner;': '\u231e',
+ 'Lleftarrow;': '\u21da',
+ 'llhard;': '\u296b',
+ 'lltri;': '\u25fa',
+ 'Lmidot;': '\u013f',
+ 'lmidot;': '\u0140',
+ 'lmoust;': '\u23b0',
+ 'lmoustache;': '\u23b0',
+ 'lnap;': '\u2a89',
+ 'lnapprox;': '\u2a89',
+ 'lnE;': '\u2268',
+ 'lne;': '\u2a87',
+ 'lneq;': '\u2a87',
+ 'lneqq;': '\u2268',
+ 'lnsim;': '\u22e6',
+ 'loang;': '\u27ec',
+ 'loarr;': '\u21fd',
+ 'lobrk;': '\u27e6',
+ 'LongLeftArrow;': '\u27f5',
+ 'Longleftarrow;': '\u27f8',
+ 'longleftarrow;': '\u27f5',
+ 'LongLeftRightArrow;': '\u27f7',
+ 'Longleftrightarrow;': '\u27fa',
+ 'longleftrightarrow;': '\u27f7',
+ 'longmapsto;': '\u27fc',
+ 'LongRightArrow;': '\u27f6',
+ 'Longrightarrow;': '\u27f9',
+ 'longrightarrow;': '\u27f6',
+ 'looparrowleft;': '\u21ab',
+ 'looparrowright;': '\u21ac',
+ 'lopar;': '\u2985',
+ 'Lopf;': '\U0001d543',
+ 'lopf;': '\U0001d55d',
+ 'loplus;': '\u2a2d',
+ 'lotimes;': '\u2a34',
+ 'lowast;': '\u2217',
+ 'lowbar;': '_',
+ 'LowerLeftArrow;': '\u2199',
+ 'LowerRightArrow;': '\u2198',
+ 'loz;': '\u25ca',
+ 'lozenge;': '\u25ca',
+ 'lozf;': '\u29eb',
+ 'lpar;': '(',
+ 'lparlt;': '\u2993',
+ 'lrarr;': '\u21c6',
+ 'lrcorner;': '\u231f',
+ 'lrhar;': '\u21cb',
+ 'lrhard;': '\u296d',
+ 'lrm;': '\u200e',
+ 'lrtri;': '\u22bf',
+ 'lsaquo;': '\u2039',
+ 'Lscr;': '\u2112',
+ 'lscr;': '\U0001d4c1',
+ 'Lsh;': '\u21b0',
+ 'lsh;': '\u21b0',
+ 'lsim;': '\u2272',
+ 'lsime;': '\u2a8d',
+ 'lsimg;': '\u2a8f',
+ 'lsqb;': '[',
+ 'lsquo;': '\u2018',
+ 'lsquor;': '\u201a',
+ 'Lstrok;': '\u0141',
+ 'lstrok;': '\u0142',
+ 'LT;': '<',
+ 'LT': '<',
+ 'Lt;': '\u226a',
+ 'lt;': '<',
+ 'lt': '<',
+ 'ltcc;': '\u2aa6',
+ 'ltcir;': '\u2a79',
+ 'ltdot;': '\u22d6',
+ 'lthree;': '\u22cb',
+ 'ltimes;': '\u22c9',
+ 'ltlarr;': '\u2976',
+ 'ltquest;': '\u2a7b',
+ 'ltri;': '\u25c3',
+ 'ltrie;': '\u22b4',
+ 'ltrif;': '\u25c2',
+ 'ltrPar;': '\u2996',
+ 'lurdshar;': '\u294a',
+ 'luruhar;': '\u2966',
+ 'lvertneqq;': '\u2268\ufe00',
+ 'lvnE;': '\u2268\ufe00',
+ 'macr;': '\xaf',
+ 'macr': '\xaf',
+ 'male;': '\u2642',
+ 'malt;': '\u2720',
+ 'maltese;': '\u2720',
+ 'Map;': '\u2905',
+ 'map;': '\u21a6',
+ 'mapsto;': '\u21a6',
+ 'mapstodown;': '\u21a7',
+ 'mapstoleft;': '\u21a4',
+ 'mapstoup;': '\u21a5',
+ 'marker;': '\u25ae',
+ 'mcomma;': '\u2a29',
+ 'Mcy;': '\u041c',
+ 'mcy;': '\u043c',
+ 'mdash;': '\u2014',
+ 'mDDot;': '\u223a',
+ 'measuredangle;': '\u2221',
+ 'MediumSpace;': '\u205f',
+ 'Mellintrf;': '\u2133',
+ 'Mfr;': '\U0001d510',
+ 'mfr;': '\U0001d52a',
+ 'mho;': '\u2127',
+ 'micro;': '\xb5',
+ 'micro': '\xb5',
+ 'mid;': '\u2223',
+ 'midast;': '*',
+ 'midcir;': '\u2af0',
+ 'middot;': '\xb7',
+ 'middot': '\xb7',
+ 'minus;': '\u2212',
+ 'minusb;': '\u229f',
+ 'minusd;': '\u2238',
+ 'minusdu;': '\u2a2a',
+ 'MinusPlus;': '\u2213',
+ 'mlcp;': '\u2adb',
+ 'mldr;': '\u2026',
+ 'mnplus;': '\u2213',
+ 'models;': '\u22a7',
+ 'Mopf;': '\U0001d544',
+ 'mopf;': '\U0001d55e',
+ 'mp;': '\u2213',
+ 'Mscr;': '\u2133',
+ 'mscr;': '\U0001d4c2',
+ 'mstpos;': '\u223e',
+ 'Mu;': '\u039c',
+ 'mu;': '\u03bc',
+ 'multimap;': '\u22b8',
+ 'mumap;': '\u22b8',
+ 'nabla;': '\u2207',
+ 'Nacute;': '\u0143',
+ 'nacute;': '\u0144',
+ 'nang;': '\u2220\u20d2',
+ 'nap;': '\u2249',
+ 'napE;': '\u2a70\u0338',
+ 'napid;': '\u224b\u0338',
+ 'napos;': '\u0149',
+ 'napprox;': '\u2249',
+ 'natur;': '\u266e',
+ 'natural;': '\u266e',
+ 'naturals;': '\u2115',
+ 'nbsp;': '\xa0',
+ 'nbsp': '\xa0',
+ 'nbump;': '\u224e\u0338',
+ 'nbumpe;': '\u224f\u0338',
+ 'ncap;': '\u2a43',
+ 'Ncaron;': '\u0147',
+ 'ncaron;': '\u0148',
+ 'Ncedil;': '\u0145',
+ 'ncedil;': '\u0146',
+ 'ncong;': '\u2247',
+ 'ncongdot;': '\u2a6d\u0338',
+ 'ncup;': '\u2a42',
+ 'Ncy;': '\u041d',
+ 'ncy;': '\u043d',
+ 'ndash;': '\u2013',
+ 'ne;': '\u2260',
+ 'nearhk;': '\u2924',
+ 'neArr;': '\u21d7',
+ 'nearr;': '\u2197',
+ 'nearrow;': '\u2197',
+ 'nedot;': '\u2250\u0338',
+ 'NegativeMediumSpace;': '\u200b',
+ 'NegativeThickSpace;': '\u200b',
+ 'NegativeThinSpace;': '\u200b',
+ 'NegativeVeryThinSpace;': '\u200b',
+ 'nequiv;': '\u2262',
+ 'nesear;': '\u2928',
+ 'nesim;': '\u2242\u0338',
+ 'NestedGreaterGreater;': '\u226b',
+ 'NestedLessLess;': '\u226a',
+ 'NewLine;': '\u240a',
+ 'nexist;': '\u2204',
+ 'nexists;': '\u2204',
+ 'Nfr;': '\U0001d511',
+ 'nfr;': '\U0001d52b',
+ 'ngE;': '\u2267\u0338',
+ 'nge;': '\u2271',
+ 'ngeq;': '\u2271',
+ 'ngeqq;': '\u2267\u0338',
+ 'ngeqslant;': '\u2a7e\u0338',
+ 'nges;': '\u2a7e\u0338',
+ 'nGg;': '\u22d9\u0338',
+ 'ngsim;': '\u2275',
+ 'nGt;': '\u226b\u20d2',
+ 'ngt;': '\u226f',
+ 'ngtr;': '\u226f',
+ 'nGtv;': '\u226b\u0338',
+ 'nhArr;': '\u21ce',
+ 'nharr;': '\u21ae',
+ 'nhpar;': '\u2af2',
+ 'ni;': '\u220b',
+ 'nis;': '\u22fc',
+ 'nisd;': '\u22fa',
+ 'niv;': '\u220b',
+ 'NJcy;': '\u040a',
+ 'njcy;': '\u045a',
+ 'nlArr;': '\u21cd',
+ 'nlarr;': '\u219a',
+ 'nldr;': '\u2025',
+ 'nlE;': '\u2266\u0338',
+ 'nle;': '\u2270',
+ 'nLeftarrow;': '\u21cd',
+ 'nleftarrow;': '\u219a',
+ 'nLeftrightarrow;': '\u21ce',
+ 'nleftrightarrow;': '\u21ae',
+ 'nleq;': '\u2270',
+ 'nleqq;': '\u2266\u0338',
+ 'nleqslant;': '\u2a7d\u0338',
+ 'nles;': '\u2a7d\u0338',
+ 'nless;': '\u226e',
+ 'nLl;': '\u22d8\u0338',
+ 'nlsim;': '\u2274',
+ 'nLt;': '\u226a\u20d2',
+ 'nlt;': '\u226e',
+ 'nltri;': '\u22ea',
+ 'nltrie;': '\u22ec',
+ 'nLtv;': '\u226a\u0338',
+ 'nmid;': '\u2224',
+ 'NoBreak;': '\u2060',
+ 'NonBreakingSpace;': '\xa0',
+ 'Nopf;': '\u2115',
+ 'nopf;': '\U0001d55f',
+ 'Not;': '\u2aec',
+ 'not;': '\xac',
+ 'not': '\xac',
+ 'NotCongruent;': '\u2262',
+ 'NotCupCap;': '\u226d',
+ 'NotDoubleVerticalBar;': '\u2226',
+ 'NotElement;': '\u2209',
+ 'NotEqual;': '\u2260',
+ 'NotEqualTilde;': '\u2242\u0338',
+ 'NotExists;': '\u2204',
+ 'NotGreater;': '\u226f',
+ 'NotGreaterEqual;': '\u2271',
+ 'NotGreaterFullEqual;': '\u2267\u0338',
+ 'NotGreaterGreater;': '\u226b\u0338',
+ 'NotGreaterLess;': '\u2279',
+ 'NotGreaterSlantEqual;': '\u2a7e\u0338',
+ 'NotGreaterTilde;': '\u2275',
+ 'NotHumpDownHump;': '\u224e\u0338',
+ 'NotHumpEqual;': '\u224f\u0338',
+ 'notin;': '\u2209',
+ 'notindot;': '\u22f5\u0338',
+ 'notinE;': '\u22f9\u0338',
+ 'notinva;': '\u2209',
+ 'notinvb;': '\u22f7',
+ 'notinvc;': '\u22f6',
+ 'NotLeftTriangle;': '\u22ea',
+ 'NotLeftTriangleBar;': '\u29cf\u0338',
+ 'NotLeftTriangleEqual;': '\u22ec',
+ 'NotLess;': '\u226e',
+ 'NotLessEqual;': '\u2270',
+ 'NotLessGreater;': '\u2278',
+ 'NotLessLess;': '\u226a\u0338',
+ 'NotLessSlantEqual;': '\u2a7d\u0338',
+ 'NotLessTilde;': '\u2274',
+ 'NotNestedGreaterGreater;': '\u2aa2\u0338',
+ 'NotNestedLessLess;': '\u2aa1\u0338',
+ 'notni;': '\u220c',
+ 'notniva;': '\u220c',
+ 'notnivb;': '\u22fe',
+ 'notnivc;': '\u22fd',
+ 'NotPrecedes;': '\u2280',
+ 'NotPrecedesEqual;': '\u2aaf\u0338',
+ 'NotPrecedesSlantEqual;': '\u22e0',
+ 'NotReverseElement;': '\u220c',
+ 'NotRightTriangle;': '\u22eb',
+ 'NotRightTriangleBar;': '\u29d0\u0338',
+ 'NotRightTriangleEqual;': '\u22ed',
+ 'NotSquareSubset;': '\u228f\u0338',
+ 'NotSquareSubsetEqual;': '\u22e2',
+ 'NotSquareSuperset;': '\u2290\u0338',
+ 'NotSquareSupersetEqual;': '\u22e3',
+ 'NotSubset;': '\u2282\u20d2',
+ 'NotSubsetEqual;': '\u2288',
+ 'NotSucceeds;': '\u2281',
+ 'NotSucceedsEqual;': '\u2ab0\u0338',
+ 'NotSucceedsSlantEqual;': '\u22e1',
+ 'NotSucceedsTilde;': '\u227f\u0338',
+ 'NotSuperset;': '\u2283\u20d2',
+ 'NotSupersetEqual;': '\u2289',
+ 'NotTilde;': '\u2241',
+ 'NotTildeEqual;': '\u2244',
+ 'NotTildeFullEqual;': '\u2247',
+ 'NotTildeTilde;': '\u2249',
+ 'NotVerticalBar;': '\u2224',
+ 'npar;': '\u2226',
+ 'nparallel;': '\u2226',
+ 'nparsl;': '\u2afd\u20e5',
+ 'npart;': '\u2202\u0338',
+ 'npolint;': '\u2a14',
+ 'npr;': '\u2280',
+ 'nprcue;': '\u22e0',
+ 'npre;': '\u2aaf\u0338',
+ 'nprec;': '\u2280',
+ 'npreceq;': '\u2aaf\u0338',
+ 'nrArr;': '\u21cf',
+ 'nrarr;': '\u219b',
+ 'nrarrc;': '\u2933\u0338',
+ 'nrarrw;': '\u219d\u0338',
+ 'nRightarrow;': '\u21cf',
+ 'nrightarrow;': '\u219b',
+ 'nrtri;': '\u22eb',
+ 'nrtrie;': '\u22ed',
+ 'nsc;': '\u2281',
+ 'nsccue;': '\u22e1',
+ 'nsce;': '\u2ab0\u0338',
+ 'Nscr;': '\U0001d4a9',
+ 'nscr;': '\U0001d4c3',
+ 'nshortmid;': '\u2224',
+ 'nshortparallel;': '\u2226',
+ 'nsim;': '\u2241',
+ 'nsime;': '\u2244',
+ 'nsimeq;': '\u2244',
+ 'nsmid;': '\u2224',
+ 'nspar;': '\u2226',
+ 'nsqsube;': '\u22e2',
+ 'nsqsupe;': '\u22e3',
+ 'nsub;': '\u2284',
+ 'nsubE;': '\u2ac5\u0338',
+ 'nsube;': '\u2288',
+ 'nsubset;': '\u2282\u20d2',
+ 'nsubseteq;': '\u2288',
+ 'nsubseteqq;': '\u2ac5\u0338',
+ 'nsucc;': '\u2281',
+ 'nsucceq;': '\u2ab0\u0338',
+ 'nsup;': '\u2285',
+ 'nsupE;': '\u2ac6\u0338',
+ 'nsupe;': '\u2289',
+ 'nsupset;': '\u2283\u20d2',
+ 'nsupseteq;': '\u2289',
+ 'nsupseteqq;': '\u2ac6\u0338',
+ 'ntgl;': '\u2279',
+ 'Ntilde;': '\xd1',
+ 'Ntilde': '\xd1',
+ 'ntilde;': '\xf1',
+ 'ntilde': '\xf1',
+ 'ntlg;': '\u2278',
+ 'ntriangleleft;': '\u22ea',
+ 'ntrianglelefteq;': '\u22ec',
+ 'ntriangleright;': '\u22eb',
+ 'ntrianglerighteq;': '\u22ed',
+ 'Nu;': '\u039d',
+ 'nu;': '\u03bd',
+ 'num;': '#',
+ 'numero;': '\u2116',
+ 'numsp;': '\u2007',
+ 'nvap;': '\u224d\u20d2',
+ 'nVDash;': '\u22af',
+ 'nVdash;': '\u22ae',
+ 'nvDash;': '\u22ad',
+ 'nvdash;': '\u22ac',
+ 'nvge;': '\u2265\u20d2',
+ 'nvgt;': '>\u20d2',
+ 'nvHarr;': '\u2904',
+ 'nvinfin;': '\u29de',
+ 'nvlArr;': '\u2902',
+ 'nvle;': '\u2264\u20d2',
+ 'nvlt;': '<\u20d2',
+ 'nvltrie;': '\u22b4\u20d2',
+ 'nvrArr;': '\u2903',
+ 'nvrtrie;': '\u22b5\u20d2',
+ 'nvsim;': '\u223c\u20d2',
+ 'nwarhk;': '\u2923',
+ 'nwArr;': '\u21d6',
+ 'nwarr;': '\u2196',
+ 'nwarrow;': '\u2196',
+ 'nwnear;': '\u2927',
+ 'Oacute;': '\xd3',
+ 'Oacute': '\xd3',
+ 'oacute;': '\xf3',
+ 'oacute': '\xf3',
+ 'oast;': '\u229b',
+ 'ocir;': '\u229a',
+ 'Ocirc;': '\xd4',
+ 'Ocirc': '\xd4',
+ 'ocirc;': '\xf4',
+ 'ocirc': '\xf4',
+ 'Ocy;': '\u041e',
+ 'ocy;': '\u043e',
+ 'odash;': '\u229d',
+ 'Odblac;': '\u0150',
+ 'odblac;': '\u0151',
+ 'odiv;': '\u2a38',
+ 'odot;': '\u2299',
+ 'odsold;': '\u29bc',
+ 'OElig;': '\u0152',
+ 'oelig;': '\u0153',
+ 'ofcir;': '\u29bf',
+ 'Ofr;': '\U0001d512',
+ 'ofr;': '\U0001d52c',
+ 'ogon;': '\u02db',
+ 'Ograve;': '\xd2',
+ 'Ograve': '\xd2',
+ 'ograve;': '\xf2',
+ 'ograve': '\xf2',
+ 'ogt;': '\u29c1',
+ 'ohbar;': '\u29b5',
+ 'ohm;': '\u03a9',
+ 'oint;': '\u222e',
+ 'olarr;': '\u21ba',
+ 'olcir;': '\u29be',
+ 'olcross;': '\u29bb',
+ 'oline;': '\u203e',
+ 'olt;': '\u29c0',
+ 'Omacr;': '\u014c',
+ 'omacr;': '\u014d',
+ 'Omega;': '\u03a9',
+ 'omega;': '\u03c9',
+ 'Omicron;': '\u039f',
+ 'omicron;': '\u03bf',
+ 'omid;': '\u29b6',
+ 'ominus;': '\u2296',
+ 'Oopf;': '\U0001d546',
+ 'oopf;': '\U0001d560',
+ 'opar;': '\u29b7',
+ 'OpenCurlyDoubleQuote;': '\u201c',
+ 'OpenCurlyQuote;': '\u2018',
+ 'operp;': '\u29b9',
+ 'oplus;': '\u2295',
+ 'Or;': '\u2a54',
+ 'or;': '\u2228',
+ 'orarr;': '\u21bb',
+ 'ord;': '\u2a5d',
+ 'order;': '\u2134',
+ 'orderof;': '\u2134',
+ 'ordf;': '\xaa',
+ 'ordf': '\xaa',
+ 'ordm;': '\xba',
+ 'ordm': '\xba',
+ 'origof;': '\u22b6',
+ 'oror;': '\u2a56',
+ 'orslope;': '\u2a57',
+ 'orv;': '\u2a5b',
+ 'oS;': '\u24c8',
+ 'Oscr;': '\U0001d4aa',
+ 'oscr;': '\u2134',
+ 'Oslash;': '\xd8',
+ 'Oslash': '\xd8',
+ 'oslash;': '\xf8',
+ 'oslash': '\xf8',
+ 'osol;': '\u2298',
+ 'Otilde;': '\xd5',
+ 'Otilde': '\xd5',
+ 'otilde;': '\xf5',
+ 'otilde': '\xf5',
+ 'Otimes;': '\u2a37',
+ 'otimes;': '\u2297',
+ 'otimesas;': '\u2a36',
+ 'Ouml;': '\xd6',
+ 'Ouml': '\xd6',
+ 'ouml;': '\xf6',
+ 'ouml': '\xf6',
+ 'ovbar;': '\u233d',
+ 'OverBar;': '\u203e',
+ 'OverBrace;': '\u23de',
+ 'OverBracket;': '\u23b4',
+ 'OverParenthesis;': '\u23dc',
+ 'par;': '\u2225',
+ 'para;': '\xb6',
+ 'para': '\xb6',
+ 'parallel;': '\u2225',
+ 'parsim;': '\u2af3',
+ 'parsl;': '\u2afd',
+ 'part;': '\u2202',
+ 'PartialD;': '\u2202',
+ 'Pcy;': '\u041f',
+ 'pcy;': '\u043f',
+ 'percnt;': '%',
+ 'period;': '.',
+ 'permil;': '\u2030',
+ 'perp;': '\u22a5',
+ 'pertenk;': '\u2031',
+ 'Pfr;': '\U0001d513',
+ 'pfr;': '\U0001d52d',
+ 'Phi;': '\u03a6',
+ 'phi;': '\u03c6',
+ 'phiv;': '\u03d5',
+ 'phmmat;': '\u2133',
+ 'phone;': '\u260e',
+ 'Pi;': '\u03a0',
+ 'pi;': '\u03c0',
+ 'pitchfork;': '\u22d4',
+ 'piv;': '\u03d6',
+ 'planck;': '\u210f',
+ 'planckh;': '\u210e',
+ 'plankv;': '\u210f',
+ 'plus;': '+',
+ 'plusacir;': '\u2a23',
+ 'plusb;': '\u229e',
+ 'pluscir;': '\u2a22',
+ 'plusdo;': '\u2214',
+ 'plusdu;': '\u2a25',
+ 'pluse;': '\u2a72',
+ 'PlusMinus;': '\xb1',
+ 'plusmn;': '\xb1',
+ 'plusmn': '\xb1',
+ 'plussim;': '\u2a26',
+ 'plustwo;': '\u2a27',
+ 'pm;': '\xb1',
+ 'Poincareplane;': '\u210c',
+ 'pointint;': '\u2a15',
+ 'Popf;': '\u2119',
+ 'popf;': '\U0001d561',
+ 'pound;': '\xa3',
+ 'pound': '\xa3',
+ 'Pr;': '\u2abb',
+ 'pr;': '\u227a',
+ 'prap;': '\u2ab7',
+ 'prcue;': '\u227c',
+ 'prE;': '\u2ab3',
+ 'pre;': '\u2aaf',
+ 'prec;': '\u227a',
+ 'precapprox;': '\u2ab7',
+ 'preccurlyeq;': '\u227c',
+ 'Precedes;': '\u227a',
+ 'PrecedesEqual;': '\u2aaf',
+ 'PrecedesSlantEqual;': '\u227c',
+ 'PrecedesTilde;': '\u227e',
+ 'preceq;': '\u2aaf',
+ 'precnapprox;': '\u2ab9',
+ 'precneqq;': '\u2ab5',
+ 'precnsim;': '\u22e8',
+ 'precsim;': '\u227e',
+ 'Prime;': '\u2033',
+ 'prime;': '\u2032',
+ 'primes;': '\u2119',
+ 'prnap;': '\u2ab9',
+ 'prnE;': '\u2ab5',
+ 'prnsim;': '\u22e8',
+ 'prod;': '\u220f',
+ 'Product;': '\u220f',
+ 'profalar;': '\u232e',
+ 'profline;': '\u2312',
+ 'profsurf;': '\u2313',
+ 'prop;': '\u221d',
+ 'Proportion;': '\u2237',
+ 'Proportional;': '\u221d',
+ 'propto;': '\u221d',
+ 'prsim;': '\u227e',
+ 'prurel;': '\u22b0',
+ 'Pscr;': '\U0001d4ab',
+ 'pscr;': '\U0001d4c5',
+ 'Psi;': '\u03a8',
+ 'psi;': '\u03c8',
+ 'puncsp;': '\u2008',
+ 'Qfr;': '\U0001d514',
+ 'qfr;': '\U0001d52e',
+ 'qint;': '\u2a0c',
+ 'Qopf;': '\u211a',
+ 'qopf;': '\U0001d562',
+ 'qprime;': '\u2057',
+ 'Qscr;': '\U0001d4ac',
+ 'qscr;': '\U0001d4c6',
+ 'quaternions;': '\u210d',
+ 'quatint;': '\u2a16',
+ 'quest;': '?',
+ 'questeq;': '\u225f',
+ 'QUOT;': '"',
+ 'QUOT': '"',
+ 'quot;': '"',
+ 'quot': '"',
+ 'rAarr;': '\u21db',
+ 'race;': '\u223d\u0331',
+ 'Racute;': '\u0154',
+ 'racute;': '\u0155',
+ 'radic;': '\u221a',
+ 'raemptyv;': '\u29b3',
+ 'Rang;': '\u27eb',
+ 'rang;': '\u232a',
+ 'rangd;': '\u2992',
+ 'range;': '\u29a5',
+ 'rangle;': '\u232a',
+ 'raquo;': '\xbb',
+ 'raquo': '\xbb',
+ 'Rarr;': '\u21a0',
+ 'rArr;': '\u21d2',
+ 'rarr;': '\u2192',
+ 'rarrap;': '\u2975',
+ 'rarrb;': '\u21e5',
+ 'rarrbfs;': '\u2920',
+ 'rarrc;': '\u2933',
+ 'rarrfs;': '\u291e',
+ 'rarrhk;': '\u21aa',
+ 'rarrlp;': '\u21ac',
+ 'rarrpl;': '\u2945',
+ 'rarrsim;': '\u2974',
+ 'Rarrtl;': '\u2916',
+ 'rarrtl;': '\u21a3',
+ 'rarrw;': '\u219d',
+ 'rAtail;': '\u291c',
+ 'ratail;': '\u291a',
+ 'ratio;': '\u2236',
+ 'rationals;': '\u211a',
+ 'RBarr;': '\u2910',
+ 'rBarr;': '\u290f',
+ 'rbarr;': '\u290d',
+ 'rbbrk;': '\u2773',
+ 'rbrace;': '}',
+ 'rbrack;': ']',
+ 'rbrke;': '\u298c',
+ 'rbrksld;': '\u298e',
+ 'rbrkslu;': '\u2990',
+ 'Rcaron;': '\u0158',
+ 'rcaron;': '\u0159',
+ 'Rcedil;': '\u0156',
+ 'rcedil;': '\u0157',
+ 'rceil;': '\u2309',
+ 'rcub;': '}',
+ 'Rcy;': '\u0420',
+ 'rcy;': '\u0440',
+ 'rdca;': '\u2937',
+ 'rdldhar;': '\u2969',
+ 'rdquo;': '\u201d',
+ 'rdquor;': '\u201d',
+ 'rdsh;': '\u21b3',
+ 'Re;': '\u211c',
+ 'real;': '\u211c',
+ 'realine;': '\u211b',
+ 'realpart;': '\u211c',
+ 'reals;': '\u211d',
+ 'rect;': '\u25ad',
+ 'REG;': '\xae',
+ 'REG': '\xae',
+ 'reg;': '\xae',
+ 'reg': '\xae',
+ 'ReverseElement;': '\u220b',
+ 'ReverseEquilibrium;': '\u21cb',
+ 'ReverseUpEquilibrium;': '\u296f',
+ 'rfisht;': '\u297d',
+ 'rfloor;': '\u230b',
+ 'Rfr;': '\u211c',
+ 'rfr;': '\U0001d52f',
+ 'rHar;': '\u2964',
+ 'rhard;': '\u21c1',
+ 'rharu;': '\u21c0',
+ 'rharul;': '\u296c',
+ 'Rho;': '\u03a1',
+ 'rho;': '\u03c1',
+ 'rhov;': '\u03f1',
+ 'RightAngleBracket;': '\u232a',
+ 'RightArrow;': '\u2192',
+ 'Rightarrow;': '\u21d2',
+ 'rightarrow;': '\u2192',
+ 'RightArrowBar;': '\u21e5',
+ 'RightArrowLeftArrow;': '\u21c4',
+ 'rightarrowtail;': '\u21a3',
+ 'RightCeiling;': '\u2309',
+ 'RightDoubleBracket;': '\u27e7',
+ 'RightDownTeeVector;': '\u295d',
+ 'RightDownVector;': '\u21c2',
+ 'RightDownVectorBar;': '\u2955',
+ 'RightFloor;': '\u230b',
+ 'rightharpoondown;': '\u21c1',
+ 'rightharpoonup;': '\u21c0',
+ 'rightleftarrows;': '\u21c4',
+ 'rightleftharpoons;': '\u21cc',
+ 'rightrightarrows;': '\u21c9',
+ 'rightsquigarrow;': '\u219d',
+ 'RightTee;': '\u22a2',
+ 'RightTeeArrow;': '\u21a6',
+ 'RightTeeVector;': '\u295b',
+ 'rightthreetimes;': '\u22cc',
+ 'RightTriangle;': '\u22b3',
+ 'RightTriangleBar;': '\u29d0',
+ 'RightTriangleEqual;': '\u22b5',
+ 'RightUpDownVector;': '\u294f',
+ 'RightUpTeeVector;': '\u295c',
+ 'RightUpVector;': '\u21be',
+ 'RightUpVectorBar;': '\u2954',
+ 'RightVector;': '\u21c0',
+ 'RightVectorBar;': '\u2953',
+ 'ring;': '\u02da',
+ 'risingdotseq;': '\u2253',
+ 'rlarr;': '\u21c4',
+ 'rlhar;': '\u21cc',
+ 'rlm;': '\u200f',
+ 'rmoust;': '\u23b1',
+ 'rmoustache;': '\u23b1',
+ 'rnmid;': '\u2aee',
+ 'roang;': '\u27ed',
+ 'roarr;': '\u21fe',
+ 'robrk;': '\u27e7',
+ 'ropar;': '\u2986',
+ 'Ropf;': '\u211d',
+ 'ropf;': '\U0001d563',
+ 'roplus;': '\u2a2e',
+ 'rotimes;': '\u2a35',
+ 'RoundImplies;': '\u2970',
+ 'rpar;': ')',
+ 'rpargt;': '\u2994',
+ 'rppolint;': '\u2a12',
+ 'rrarr;': '\u21c9',
+ 'Rrightarrow;': '\u21db',
+ 'rsaquo;': '\u203a',
+ 'Rscr;': '\u211b',
+ 'rscr;': '\U0001d4c7',
+ 'Rsh;': '\u21b1',
+ 'rsh;': '\u21b1',
+ 'rsqb;': ']',
+ 'rsquo;': '\u2019',
+ 'rsquor;': '\u2019',
+ 'rthree;': '\u22cc',
+ 'rtimes;': '\u22ca',
+ 'rtri;': '\u25b9',
+ 'rtrie;': '\u22b5',
+ 'rtrif;': '\u25b8',
+ 'rtriltri;': '\u29ce',
+ 'RuleDelayed;': '\u29f4',
+ 'ruluhar;': '\u2968',
+ 'rx;': '\u211e',
+ 'Sacute;': '\u015a',
+ 'sacute;': '\u015b',
+ 'sbquo;': '\u201a',
+ 'Sc;': '\u2abc',
+ 'sc;': '\u227b',
+ 'scap;': '\u2ab8',
+ 'Scaron;': '\u0160',
+ 'scaron;': '\u0161',
+ 'sccue;': '\u227d',
+ 'scE;': '\u2ab4',
+ 'sce;': '\u2ab0',
+ 'Scedil;': '\u015e',
+ 'scedil;': '\u015f',
+ 'Scirc;': '\u015c',
+ 'scirc;': '\u015d',
+ 'scnap;': '\u2aba',
+ 'scnE;': '\u2ab6',
+ 'scnsim;': '\u22e9',
+ 'scpolint;': '\u2a13',
+ 'scsim;': '\u227f',
+ 'Scy;': '\u0421',
+ 'scy;': '\u0441',
+ 'sdot;': '\u22c5',
+ 'sdotb;': '\u22a1',
+ 'sdote;': '\u2a66',
+ 'searhk;': '\u2925',
+ 'seArr;': '\u21d8',
+ 'searr;': '\u2198',
+ 'searrow;': '\u2198',
+ 'sect;': '\xa7',
+ 'sect': '\xa7',
+ 'semi;': ';',
+ 'seswar;': '\u2929',
+ 'setminus;': '\u2216',
+ 'setmn;': '\u2216',
+ 'sext;': '\u2736',
+ 'Sfr;': '\U0001d516',
+ 'sfr;': '\U0001d530',
+ 'sfrown;': '\u2322',
+ 'sharp;': '\u266f',
+ 'SHCHcy;': '\u0429',
+ 'shchcy;': '\u0449',
+ 'SHcy;': '\u0428',
+ 'shcy;': '\u0448',
+ 'ShortDownArrow;': '\u2193',
+ 'ShortLeftArrow;': '\u2190',
+ 'shortmid;': '\u2223',
+ 'shortparallel;': '\u2225',
+ 'ShortRightArrow;': '\u2192',
+ 'ShortUpArrow;': '\u2191',
+ 'shy;': '\xad',
+ 'shy': '\xad',
+ 'Sigma;': '\u03a3',
+ 'sigma;': '\u03c3',
+ 'sigmaf;': '\u03c2',
+ 'sigmav;': '\u03c2',
+ 'sim;': '\u223c',
+ 'simdot;': '\u2a6a',
+ 'sime;': '\u2243',
+ 'simeq;': '\u2243',
+ 'simg;': '\u2a9e',
+ 'simgE;': '\u2aa0',
+ 'siml;': '\u2a9d',
+ 'simlE;': '\u2a9f',
+ 'simne;': '\u2246',
+ 'simplus;': '\u2a24',
+ 'simrarr;': '\u2972',
+ 'slarr;': '\u2190',
+ 'SmallCircle;': '\u2218',
+ 'smallsetminus;': '\u2216',
+ 'smashp;': '\u2a33',
+ 'smeparsl;': '\u29e4',
+ 'smid;': '\u2223',
+ 'smile;': '\u2323',
+ 'smt;': '\u2aaa',
+ 'smte;': '\u2aac',
+ 'smtes;': '\u2aac\ufe00',
+ 'SOFTcy;': '\u042c',
+ 'softcy;': '\u044c',
+ 'sol;': '/',
+ 'solb;': '\u29c4',
+ 'solbar;': '\u233f',
+ 'Sopf;': '\U0001d54a',
+ 'sopf;': '\U0001d564',
+ 'spades;': '\u2660',
+ 'spadesuit;': '\u2660',
+ 'spar;': '\u2225',
+ 'sqcap;': '\u2293',
+ 'sqcaps;': '\u2293\ufe00',
+ 'sqcup;': '\u2294',
+ 'sqcups;': '\u2294\ufe00',
+ 'Sqrt;': '\u221a',
+ 'sqsub;': '\u228f',
+ 'sqsube;': '\u2291',
+ 'sqsubset;': '\u228f',
+ 'sqsubseteq;': '\u2291',
+ 'sqsup;': '\u2290',
+ 'sqsupe;': '\u2292',
+ 'sqsupset;': '\u2290',
+ 'sqsupseteq;': '\u2292',
+ 'squ;': '\u25a1',
+ 'Square;': '\u25a1',
+ 'square;': '\u25a1',
+ 'SquareIntersection;': '\u2293',
+ 'SquareSubset;': '\u228f',
+ 'SquareSubsetEqual;': '\u2291',
+ 'SquareSuperset;': '\u2290',
+ 'SquareSupersetEqual;': '\u2292',
+ 'SquareUnion;': '\u2294',
+ 'squarf;': '\u25aa',
+ 'squf;': '\u25aa',
+ 'srarr;': '\u2192',
+ 'Sscr;': '\U0001d4ae',
+ 'sscr;': '\U0001d4c8',
+ 'ssetmn;': '\u2216',
+ 'ssmile;': '\u2323',
+ 'sstarf;': '\u22c6',
+ 'Star;': '\u22c6',
+ 'star;': '\u2606',
+ 'starf;': '\u2605',
+ 'straightepsilon;': '\u03f5',
+ 'straightphi;': '\u03d5',
+ 'strns;': '\xaf',
+ 'Sub;': '\u22d0',
+ 'sub;': '\u2282',
+ 'subdot;': '\u2abd',
+ 'subE;': '\u2ac5',
+ 'sube;': '\u2286',
+ 'subedot;': '\u2ac3',
+ 'submult;': '\u2ac1',
+ 'subnE;': '\u2acb',
+ 'subne;': '\u228a',
+ 'subplus;': '\u2abf',
+ 'subrarr;': '\u2979',
+ 'Subset;': '\u22d0',
+ 'subset;': '\u2282',
+ 'subseteq;': '\u2286',
+ 'subseteqq;': '\u2ac5',
+ 'SubsetEqual;': '\u2286',
+ 'subsetneq;': '\u228a',
+ 'subsetneqq;': '\u2acb',
+ 'subsim;': '\u2ac7',
+ 'subsub;': '\u2ad5',
+ 'subsup;': '\u2ad3',
+ 'succ;': '\u227b',
+ 'succapprox;': '\u2ab8',
+ 'succcurlyeq;': '\u227d',
+ 'Succeeds;': '\u227b',
+ 'SucceedsEqual;': '\u2ab0',
+ 'SucceedsSlantEqual;': '\u227d',
+ 'SucceedsTilde;': '\u227f',
+ 'succeq;': '\u2ab0',
+ 'succnapprox;': '\u2aba',
+ 'succneqq;': '\u2ab6',
+ 'succnsim;': '\u22e9',
+ 'succsim;': '\u227f',
+ 'SuchThat;': '\u220b',
+ 'Sum;': '\u2211',
+ 'sum;': '\u2211',
+ 'sung;': '\u266a',
+ 'Sup;': '\u22d1',
+ 'sup;': '\u2283',
+ 'sup1;': '\xb9',
+ 'sup1': '\xb9',
+ 'sup2;': '\xb2',
+ 'sup2': '\xb2',
+ 'sup3;': '\xb3',
+ 'sup3': '\xb3',
+ 'supdot;': '\u2abe',
+ 'supdsub;': '\u2ad8',
+ 'supE;': '\u2ac6',
+ 'supe;': '\u2287',
+ 'supedot;': '\u2ac4',
+ 'Superset;': '\u2283',
+ 'SupersetEqual;': '\u2287',
+ 'suphsol;': '\u27c9',
+ 'suphsub;': '\u2ad7',
+ 'suplarr;': '\u297b',
+ 'supmult;': '\u2ac2',
+ 'supnE;': '\u2acc',
+ 'supne;': '\u228b',
+ 'supplus;': '\u2ac0',
+ 'Supset;': '\u22d1',
+ 'supset;': '\u2283',
+ 'supseteq;': '\u2287',
+ 'supseteqq;': '\u2ac6',
+ 'supsetneq;': '\u228b',
+ 'supsetneqq;': '\u2acc',
+ 'supsim;': '\u2ac8',
+ 'supsub;': '\u2ad4',
+ 'supsup;': '\u2ad6',
+ 'swarhk;': '\u2926',
+ 'swArr;': '\u21d9',
+ 'swarr;': '\u2199',
+ 'swarrow;': '\u2199',
+ 'swnwar;': '\u292a',
+ 'szlig;': '\xdf',
+ 'szlig': '\xdf',
+ 'Tab;': '\u2409',
+ 'target;': '\u2316',
+ 'Tau;': '\u03a4',
+ 'tau;': '\u03c4',
+ 'tbrk;': '\u23b4',
+ 'Tcaron;': '\u0164',
+ 'tcaron;': '\u0165',
+ 'Tcedil;': '\u0162',
+ 'tcedil;': '\u0163',
+ 'Tcy;': '\u0422',
+ 'tcy;': '\u0442',
+ 'tdot;': '\u25cc\u20db',
+ 'telrec;': '\u2315',
+ 'Tfr;': '\U0001d517',
+ 'tfr;': '\U0001d531',
+ 'there4;': '\u2234',
+ 'Therefore;': '\u2234',
+ 'therefore;': '\u2234',
+ 'Theta;': '\u0398',
+ 'theta;': '\u03b8',
+ 'thetasym;': '\u03d1',
+ 'thetav;': '\u03d1',
+ 'thickapprox;': '\u2248',
+ 'thicksim;': '\u223c',
+ 'ThickSpace;': '\u205f\u200a',
+ 'thinsp;': '\u2009',
+ 'ThinSpace;': '\u2009',
+ 'thkap;': '\u2248',
+ 'thksim;': '\u223c',
+ 'THORN;': '\xde',
+ 'THORN': '\xde',
+ 'thorn;': '\xfe',
+ 'thorn': '\xfe',
+ 'Tilde;': '\u223c',
+ 'tilde;': '\u02dc',
+ 'TildeEqual;': '\u2243',
+ 'TildeFullEqual;': '\u2245',
+ 'TildeTilde;': '\u2248',
+ 'times;': '\xd7',
+ 'times': '\xd7',
+ 'timesb;': '\u22a0',
+ 'timesbar;': '\u2a31',
+ 'timesd;': '\u2a30',
+ 'tint;': '\u222d',
+ 'toea;': '\u2928',
+ 'top;': '\u22a4',
+ 'topbot;': '\u2336',
+ 'topcir;': '\u2af1',
+ 'Topf;': '\U0001d54b',
+ 'topf;': '\U0001d565',
+ 'topfork;': '\u2ada',
+ 'tosa;': '\u2929',
+ 'tprime;': '\u2034',
+ 'TRADE;': '\u2122',
+ 'trade;': '\u2122',
+ 'triangle;': '\u25b5',
+ 'triangledown;': '\u25bf',
+ 'triangleleft;': '\u25c3',
+ 'trianglelefteq;': '\u22b4',
+ 'triangleq;': '\u225c',
+ 'triangleright;': '\u25b9',
+ 'trianglerighteq;': '\u22b5',
+ 'tridot;': '\u25ec',
+ 'trie;': '\u225c',
+ 'triminus;': '\u2a3a',
+ 'TripleDot;': '\u25cc\u20db',
+ 'triplus;': '\u2a39',
+ 'trisb;': '\u29cd',
+ 'tritime;': '\u2a3b',
+ 'trpezium;': '\u23e2',
+ 'Tscr;': '\U0001d4af',
+ 'tscr;': '\U0001d4c9',
+ 'TScy;': '\u0426',
+ 'tscy;': '\u0446',
+ 'TSHcy;': '\u040b',
+ 'tshcy;': '\u045b',
+ 'Tstrok;': '\u0166',
+ 'tstrok;': '\u0167',
+ 'twixt;': '\u226c',
+ 'twoheadleftarrow;': '\u219e',
+ 'twoheadrightarrow;': '\u21a0',
+ 'Uacute;': '\xda',
+ 'Uacute': '\xda',
+ 'uacute;': '\xfa',
+ 'uacute': '\xfa',
+ 'Uarr;': '\u219f',
+ 'uArr;': '\u21d1',
+ 'uarr;': '\u2191',
+ 'Uarrocir;': '\u2949',
+ 'Ubrcy;': '\u040e',
+ 'ubrcy;': '\u045e',
+ 'Ubreve;': '\u016c',
+ 'ubreve;': '\u016d',
+ 'Ucirc;': '\xdb',
+ 'Ucirc': '\xdb',
+ 'ucirc;': '\xfb',
+ 'ucirc': '\xfb',
+ 'Ucy;': '\u0423',
+ 'ucy;': '\u0443',
+ 'udarr;': '\u21c5',
+ 'Udblac;': '\u0170',
+ 'udblac;': '\u0171',
+ 'udhar;': '\u296e',
+ 'ufisht;': '\u297e',
+ 'Ufr;': '\U0001d518',
+ 'ufr;': '\U0001d532',
+ 'Ugrave;': '\xd9',
+ 'Ugrave': '\xd9',
+ 'ugrave;': '\xf9',
+ 'ugrave': '\xf9',
+ 'uHar;': '\u2963',
+ 'uharl;': '\u21bf',
+ 'uharr;': '\u21be',
+ 'uhblk;': '\u2580',
+ 'ulcorn;': '\u231c',
+ 'ulcorner;': '\u231c',
+ 'ulcrop;': '\u230f',
+ 'ultri;': '\u25f8',
+ 'Umacr;': '\u016a',
+ 'umacr;': '\u016b',
+ 'uml;': '\xa8',
+ 'uml': '\xa8',
+ 'UnderBar;': '_',
+ 'UnderBrace;': '\u23df',
+ 'UnderBracket;': '\u23b5',
+ 'UnderParenthesis;': '\u23dd',
+ 'Union;': '\u22c3',
+ 'UnionPlus;': '\u228e',
+ 'Uogon;': '\u0172',
+ 'uogon;': '\u0173',
+ 'Uopf;': '\U0001d54c',
+ 'uopf;': '\U0001d566',
+ 'UpArrow;': '\u2191',
+ 'Uparrow;': '\u21d1',
+ 'uparrow;': '\u2191',
+ 'UpArrowBar;': '\u2912',
+ 'UpArrowDownArrow;': '\u21c5',
+ 'UpDownArrow;': '\u2195',
+ 'Updownarrow;': '\u21d5',
+ 'updownarrow;': '\u2195',
+ 'UpEquilibrium;': '\u296e',
+ 'upharpoonleft;': '\u21bf',
+ 'upharpoonright;': '\u21be',
+ 'uplus;': '\u228e',
+ 'UpperLeftArrow;': '\u2196',
+ 'UpperRightArrow;': '\u2197',
+ 'Upsi;': '\u03d2',
+ 'upsi;': '\u03c5',
+ 'upsih;': '\u03d2',
+ 'Upsilon;': '\u03a5',
+ 'upsilon;': '\u03c5',
+ 'UpTee;': '\u22a5',
+ 'UpTeeArrow;': '\u21a5',
+ 'upuparrows;': '\u21c8',
+ 'urcorn;': '\u231d',
+ 'urcorner;': '\u231d',
+ 'urcrop;': '\u230e',
+ 'Uring;': '\u016e',
+ 'uring;': '\u016f',
+ 'urtri;': '\u25f9',
+ 'Uscr;': '\U0001d4b0',
+ 'uscr;': '\U0001d4ca',
+ 'utdot;': '\u22f0',
+ 'Utilde;': '\u0168',
+ 'utilde;': '\u0169',
+ 'utri;': '\u25b5',
+ 'utrif;': '\u25b4',
+ 'uuarr;': '\u21c8',
+ 'Uuml;': '\xdc',
+ 'Uuml': '\xdc',
+ 'uuml;': '\xfc',
+ 'uuml': '\xfc',
+ 'uwangle;': '\u29a7',
+ 'vangrt;': '\u299c',
+ 'varepsilon;': '\u03f5',
+ 'varkappa;': '\u03f0',
+ 'varnothing;': '\u2205',
+ 'varphi;': '\u03d5',
+ 'varpi;': '\u03d6',
+ 'varpropto;': '\u221d',
+ 'vArr;': '\u21d5',
+ 'varr;': '\u2195',
+ 'varrho;': '\u03f1',
+ 'varsigma;': '\u03c2',
+ 'varsubsetneq;': '\u228a\ufe00',
+ 'varsubsetneqq;': '\u2acb\ufe00',
+ 'varsupsetneq;': '\u228b\ufe00',
+ 'varsupsetneqq;': '\u2acc\ufe00',
+ 'vartheta;': '\u03d1',
+ 'vartriangleleft;': '\u22b2',
+ 'vartriangleright;': '\u22b3',
+ 'Vbar;': '\u2aeb',
+ 'vBar;': '\u2ae8',
+ 'vBarv;': '\u2ae9',
+ 'Vcy;': '\u0412',
+ 'vcy;': '\u0432',
+ 'VDash;': '\u22ab',
+ 'Vdash;': '\u22a9',
+ 'vDash;': '\u22a8',
+ 'vdash;': '\u22a2',
+ 'Vdashl;': '\u2ae6',
+ 'Vee;': '\u22c1',
+ 'vee;': '\u2228',
+ 'veebar;': '\u22bb',
+ 'veeeq;': '\u225a',
+ 'vellip;': '\u22ee',
+ 'Verbar;': '\u2016',
+ 'verbar;': '|',
+ 'Vert;': '\u2016',
+ 'vert;': '|',
+ 'VerticalBar;': '\u2223',
+ 'VerticalLine;': '|',
+ 'VerticalSeparator;': '\u2758',
+ 'VerticalTilde;': '\u2240',
+ 'VeryThinSpace;': '\u200a',
+ 'Vfr;': '\U0001d519',
+ 'vfr;': '\U0001d533',
+ 'vltri;': '\u22b2',
+ 'vnsub;': '\u2282\u20d2',
+ 'vnsup;': '\u2283\u20d2',
+ 'Vopf;': '\U0001d54d',
+ 'vopf;': '\U0001d567',
+ 'vprop;': '\u221d',
+ 'vrtri;': '\u22b3',
+ 'Vscr;': '\U0001d4b1',
+ 'vscr;': '\U0001d4cb',
+ 'vsubnE;': '\u2acb\ufe00',
+ 'vsubne;': '\u228a\ufe00',
+ 'vsupnE;': '\u2acc\ufe00',
+ 'vsupne;': '\u228b\ufe00',
+ 'Vvdash;': '\u22aa',
+ 'vzigzag;': '\u299a',
+ 'Wcirc;': '\u0174',
+ 'wcirc;': '\u0175',
+ 'wedbar;': '\u2a5f',
+ 'Wedge;': '\u22c0',
+ 'wedge;': '\u2227',
+ 'wedgeq;': '\u2259',
+ 'weierp;': '\u2118',
+ 'Wfr;': '\U0001d51a',
+ 'wfr;': '\U0001d534',
+ 'Wopf;': '\U0001d54e',
+ 'wopf;': '\U0001d568',
+ 'wp;': '\u2118',
+ 'wr;': '\u2240',
+ 'wreath;': '\u2240',
+ 'Wscr;': '\U0001d4b2',
+ 'wscr;': '\U0001d4cc',
+ 'xcap;': '\u22c2',
+ 'xcirc;': '\u25ef',
+ 'xcup;': '\u22c3',
+ 'xdtri;': '\u25bd',
+ 'Xfr;': '\U0001d51b',
+ 'xfr;': '\U0001d535',
+ 'xhArr;': '\u27fa',
+ 'xharr;': '\u27f7',
+ 'Xi;': '\u039e',
+ 'xi;': '\u03be',
+ 'xlArr;': '\u27f8',
+ 'xlarr;': '\u27f5',
+ 'xmap;': '\u27fc',
+ 'xnis;': '\u22fb',
+ 'xodot;': '\u2a00',
+ 'Xopf;': '\U0001d54f',
+ 'xopf;': '\U0001d569',
+ 'xoplus;': '\u2a01',
+ 'xotime;': '\u2a02',
+ 'xrArr;': '\u27f9',
+ 'xrarr;': '\u27f6',
+ 'Xscr;': '\U0001d4b3',
+ 'xscr;': '\U0001d4cd',
+ 'xsqcup;': '\u2a06',
+ 'xuplus;': '\u2a04',
+ 'xutri;': '\u25b3',
+ 'xvee;': '\u22c1',
+ 'xwedge;': '\u22c0',
+ 'Yacute;': '\xdd',
+ 'Yacute': '\xdd',
+ 'yacute;': '\xfd',
+ 'yacute': '\xfd',
+ 'YAcy;': '\u042f',
+ 'yacy;': '\u044f',
+ 'Ycirc;': '\u0176',
+ 'ycirc;': '\u0177',
+ 'Ycy;': '\u042b',
+ 'ycy;': '\u044b',
+ 'yen;': '\xa5',
+ 'yen': '\xa5',
+ 'Yfr;': '\U0001d51c',
+ 'yfr;': '\U0001d536',
+ 'YIcy;': '\u0407',
+ 'yicy;': '\u0457',
+ 'Yopf;': '\U0001d550',
+ 'yopf;': '\U0001d56a',
+ 'Yscr;': '\U0001d4b4',
+ 'yscr;': '\U0001d4ce',
+ 'YUcy;': '\u042e',
+ 'yucy;': '\u044e',
+ 'Yuml;': '\u0178',
+ 'yuml;': '\xff',
+ 'yuml': '\xff',
+ 'Zacute;': '\u0179',
+ 'zacute;': '\u017a',
+ 'Zcaron;': '\u017d',
+ 'zcaron;': '\u017e',
+ 'Zcy;': '\u0417',
+ 'zcy;': '\u0437',
+ 'Zdot;': '\u017b',
+ 'zdot;': '\u017c',
+ 'zeetrf;': '\u2128',
+ 'ZeroWidthSpace;': '\u200b',
+ 'Zeta;': '\u0396',
+ 'zeta;': '\u03b6',
+ 'Zfr;': '\u2128',
+ 'zfr;': '\U0001d537',
+ 'ZHcy;': '\u0416',
+ 'zhcy;': '\u0436',
+ 'zigrarr;': '\u21dd',
+ 'Zopf;': '\u2124',
+ 'zopf;': '\U0001d56b',
+ 'Zscr;': '\U0001d4b5',
+ 'zscr;': '\U0001d4cf',
+ 'zwj;': '\u200d',
+ 'zwnj;': '\u200c',
+}
+
# maps the Unicode codepoint to the HTML entity name
codepoint2name = {}
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index de504ab..f8ac828 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -10,6 +10,7 @@
import _markupbase
import re
+import warnings
# Regular expressions used for parsing
@@ -113,14 +114,16 @@ class HTMLParser(_markupbase.ParserBase):
CDATA_CONTENT_ELEMENTS = ("script", "style")
- def __init__(self, strict=True):
+ def __init__(self, strict=False):
"""Initialize and reset this instance.
- If strict is set to True (the default), errors are raised when invalid
- HTML is encountered. If set to False, an attempt is instead made to
- continue parsing, making "best guesses" about the intended meaning, in
- a fashion similar to what browsers typically do.
+ If strict is set to False (the default) the parser will parse invalid
+ markup, otherwise it will raise an error. Note that the strict mode
+ is deprecated.
"""
+ if strict:
+ warnings.warn("The strict mode is deprecated.",
+ DeprecationWarning, stacklevel=2)
self.strict = strict
self.reset()
@@ -271,8 +274,8 @@ class HTMLParser(_markupbase.ParserBase):
# See also parse_declaration in _markupbase
def parse_html_declaration(self, i):
rawdata = self.rawdata
- if rawdata[i:i+2] != '<!':
- self.error('unexpected call to parse_html_declaration()')
+ assert rawdata[i:i+2] == '<!', ('unexpected call to '
+ 'parse_html_declaration()')
if rawdata[i:i+4] == '<!--':
# this case is actually already handled in goahead()
return self.parse_comment(i)
@@ -292,8 +295,8 @@ class HTMLParser(_markupbase.ParserBase):
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
def parse_bogus_comment(self, i, report=1):
rawdata = self.rawdata
- if rawdata[i:i+2] not in ('<!', '</'):
- self.error('unexpected call to parse_comment()')
+ assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
+ 'parse_comment()')
pos = rawdata.find('>', i+2)
if pos == -1:
return -1
@@ -497,7 +500,6 @@ class HTMLParser(_markupbase.ParserBase):
self.error("unknown declaration: %r" % (data,))
# Internal -- helper to remove special character quoting
- entitydefs = None
def unescape(self, s):
if '&' not in s:
return s
@@ -507,24 +509,23 @@ class HTMLParser(_markupbase.ParserBase):
if s[0] == "#":
s = s[1:]
if s[0] in ['x','X']:
- c = int(s[1:], 16)
+ c = int(s[1:].rstrip(';'), 16)
else:
- c = int(s)
+ c = int(s.rstrip(';'))
return chr(c)
except ValueError:
- return '&#'+ s +';'
+ return '&#' + s
else:
- # Cannot use name2codepoint directly, because HTMLParser
- # supports apos, which is not part of HTML 4
- import html.entities
- if HTMLParser.entitydefs is None:
- entitydefs = HTMLParser.entitydefs = {'apos':"'"}
- for k, v in html.entities.name2codepoint.items():
- entitydefs[k] = chr(v)
- try:
- return self.entitydefs[s]
- except KeyError:
- return '&'+s+';'
-
- return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));",
+ from html.entities import html5
+ if s in html5:
+ return html5[s]
+ elif s.endswith(';'):
+ return '&' + s
+ for x in range(2, len(s)):
+ if s[:x] in html5:
+ return html5[s[:x]] + s[x:]
+ else:
+ return '&' + s
+
+ return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))",
replaceEntities, s, flags=re.ASCII)
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 97a7155..9b01704 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -141,6 +141,9 @@ UNPROCESSABLE_ENTITY = 422
LOCKED = 423
FAILED_DEPENDENCY = 424
UPGRADE_REQUIRED = 426
+PRECONDITION_REQUIRED = 428
+TOO_MANY_REQUESTS = 429
+REQUEST_HEADER_FIELDS_TOO_LARGE = 431
# server error
INTERNAL_SERVER_ERROR = 500
@@ -151,6 +154,7 @@ GATEWAY_TIMEOUT = 504
HTTP_VERSION_NOT_SUPPORTED = 505
INSUFFICIENT_STORAGE = 507
NOT_EXTENDED = 510
+NETWORK_AUTHENTICATION_REQUIRED = 511
# Mapping status codes to official W3C names
responses = {
@@ -192,6 +196,9 @@ responses = {
415: 'Unsupported Media Type',
416: 'Requested Range Not Satisfiable',
417: 'Expectation Failed',
+ 428: 'Precondition Required',
+ 429: 'Too Many Requests',
+ 431: 'Request Header Fields Too Large',
500: 'Internal Server Error',
501: 'Not Implemented',
@@ -199,6 +206,7 @@ responses = {
503: 'Service Unavailable',
504: 'Gateway Timeout',
505: 'HTTP Version Not Supported',
+ 511: 'Network Authentication Required',
}
# maximal amount of data to read at one time in _safe_read
@@ -485,11 +493,17 @@ class HTTPResponse(io.RawIOBase):
self.close()
return b""
- if self.chunked:
- return self._read_chunked(amt)
+ if amt is not None:
+ # Amount is given, so call base class version
+ # (which is implemented in terms of self.readinto)
+ return super(HTTPResponse, self).read(amt)
+ else:
+ # Amount is not given (unbounded read) so we must check self.length
+ # and self.chunked
+
+ if self.chunked:
+ return self._readall_chunked()
- if amt is None:
- # unbounded read
if self.length is None:
s = self.fp.read()
else:
@@ -498,78 +512,127 @@ class HTTPResponse(io.RawIOBase):
self.close() # we read everything
return s
+ def readinto(self, b):
+ if self.fp is None:
+ return 0
+
+ if self._method == "HEAD":
+ self.close()
+ return 0
+
+ if self.chunked:
+ return self._readinto_chunked(b)
+
if self.length is not None:
- if amt > self.length:
+ if len(b) > self.length:
# clip the read to the "end of response"
- amt = self.length
+ b = memoryview(b)[0:self.length]
# we do not use _safe_read() here because this may be a .will_close
# connection, and the user is reading more bytes than will be provided
# (for example, reading in 1k chunks)
- s = self.fp.read(amt)
+ n = self.fp.readinto(b)
if self.length is not None:
- self.length -= len(s)
+ self.length -= n
if not self.length:
self.close()
- return s
+ return n
+
+ def _read_next_chunk_size(self):
+ # Read the next chunk size from the file
+ line = self.fp.readline(_MAXLINE + 1)
+ if len(line) > _MAXLINE:
+ raise LineTooLong("chunk size")
+ i = line.find(b";")
+ if i >= 0:
+ line = line[:i] # strip chunk-extensions
+ try:
+ return int(line, 16)
+ except ValueError:
+ # close the connection as protocol synchronisation is
+ # probably lost
+ self.close()
+ raise
- def _read_chunked(self, amt):
+ def _read_and_discard_trailer(self):
+ # read and discard trailer up to the CRLF terminator
+ ### note: we shouldn't have any trailers!
+ while True:
+ line = self.fp.readline(_MAXLINE + 1)
+ if len(line) > _MAXLINE:
+ raise LineTooLong("trailer line")
+ if not line:
+ # a vanishingly small number of sites EOF without
+ # sending the trailer
+ break
+ if line in (b'\r\n', b'\n', b''):
+ break
+
+ def _readall_chunked(self):
assert self.chunked != _UNKNOWN
chunk_left = self.chunk_left
value = []
while True:
if chunk_left is None:
- line = self.fp.readline(_MAXLINE + 1)
- if len(line) > _MAXLINE:
- raise LineTooLong("chunk size")
- i = line.find(b";")
- if i >= 0:
- line = line[:i] # strip chunk-extensions
try:
- chunk_left = int(line, 16)
+ chunk_left = self._read_next_chunk_size()
+ if chunk_left == 0:
+ break
except ValueError:
- # close the connection as protocol synchronisation is
- # probably lost
- self.close()
raise IncompleteRead(b''.join(value))
- if chunk_left == 0:
- break
- if amt is None:
- value.append(self._safe_read(chunk_left))
- elif amt < chunk_left:
- value.append(self._safe_read(amt))
- self.chunk_left = chunk_left - amt
- return b''.join(value)
- elif amt == chunk_left:
- value.append(self._safe_read(amt))
+ value.append(self._safe_read(chunk_left))
+
+ # we read the whole chunk, get another
+ self._safe_read(2) # toss the CRLF at the end of the chunk
+ chunk_left = None
+
+ self._read_and_discard_trailer()
+
+ # we read everything; close the "file"
+ self.close()
+
+ return b''.join(value)
+
+ def _readinto_chunked(self, b):
+ assert self.chunked != _UNKNOWN
+ chunk_left = self.chunk_left
+
+ total_bytes = 0
+ mvb = memoryview(b)
+ while True:
+ if chunk_left is None:
+ try:
+ chunk_left = self._read_next_chunk_size()
+ if chunk_left == 0:
+ break
+ except ValueError:
+ raise IncompleteRead(bytes(b[0:total_bytes]))
+
+ if len(mvb) < chunk_left:
+ n = self._safe_readinto(mvb)
+ self.chunk_left = chunk_left - n
+ return total_bytes + n
+ elif len(mvb) == chunk_left:
+ n = self._safe_readinto(mvb)
self._safe_read(2) # toss the CRLF at the end of the chunk
self.chunk_left = None
- return b''.join(value)
+ return total_bytes + n
else:
- value.append(self._safe_read(chunk_left))
- amt -= chunk_left
+ temp_mvb = mvb[0:chunk_left]
+ n = self._safe_readinto(temp_mvb)
+ mvb = mvb[n:]
+ total_bytes += n
# we read the whole chunk, get another
self._safe_read(2) # toss the CRLF at the end of the chunk
chunk_left = None
- # read and discard trailer up to the CRLF terminator
- ### note: we shouldn't have any trailers!
- while True:
- line = self.fp.readline(_MAXLINE + 1)
- if len(line) > _MAXLINE:
- raise LineTooLong("trailer line")
- if not line:
- # a vanishingly small number of sites EOF without
- # sending the trailer
- break
- if line in (b'\r\n', b'\n', b''):
- break
+ self._read_and_discard_trailer()
# we read everything; close the "file"
self.close()
- return b''.join(value)
+ return total_bytes
def _safe_read(self, amt):
"""Read the number of bytes requested, compensating for partial reads.
@@ -594,6 +657,22 @@ class HTTPResponse(io.RawIOBase):
amt -= len(chunk)
return b"".join(s)
+ def _safe_readinto(self, b):
+ """Same as _safe_read, but for reading into a buffer."""
+ total_bytes = 0
+ mvb = memoryview(b)
+ while total_bytes < len(b):
+ if MAXAMOUNT < len(mvb):
+ temp_mvb = mvb[0:MAXAMOUNT]
+ n = self.fp.readinto(temp_mvb)
+ else:
+ n = self.fp.readinto(mvb)
+ if not n:
+ raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
+ mvb = mvb[n:]
+ total_bytes += n
+ return total_bytes
+
def fileno(self):
return self.fp.fileno()
@@ -700,7 +779,7 @@ class HTTPConnection:
self.send(connect_bytes)
for header, value in self._tunnel_headers.items():
header_str = "%s: %s\r\n" % (header, value)
- header_bytes = header_str.encode("latin1")
+ header_bytes = header_str.encode("latin-1")
self.send(header_bytes)
self.send(b'\r\n')
@@ -943,7 +1022,7 @@ class HTTPConnection:
values = list(values)
for i, one_value in enumerate(values):
if hasattr(one_value, 'encode'):
- values[i] = one_value.encode('latin1')
+ values[i] = one_value.encode('latin-1')
elif isinstance(one_value, int):
values[i] = str(one_value).encode('ascii')
value = b'\r\n\t'.join(values)
diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py
index b6cfc35..901e762 100644
--- a/Lib/http/cookiejar.py
+++ b/Lib/http/cookiejar.py
@@ -625,7 +625,7 @@ def request_path(request):
return path
def request_port(request):
- host = request.get_host()
+ host = request.host
i = host.find(':')
if i >= 0:
port = host[i+1:]
@@ -949,7 +949,7 @@ class DefaultCookiePolicy(CookiePolicy):
return True
def set_ok_verifiability(self, cookie, request):
- if request.is_unverifiable() and is_third_party(request):
+ if request.unverifiable and is_third_party(request):
if cookie.version > 0 and self.strict_rfc2965_unverifiable:
_debug(" third-party RFC 2965 cookie during "
"unverifiable transaction")
@@ -1088,7 +1088,7 @@ class DefaultCookiePolicy(CookiePolicy):
return True
def return_ok_verifiability(self, cookie, request):
- if request.is_unverifiable() and is_third_party(request):
+ if request.unverifiable and is_third_party(request):
if cookie.version > 0 and self.strict_rfc2965_unverifiable:
_debug(" third-party RFC 2965 cookie during unverifiable "
"transaction")
@@ -1100,7 +1100,7 @@ class DefaultCookiePolicy(CookiePolicy):
return True
def return_ok_secure(self, cookie, request):
- if cookie.secure and request.get_type() != "https":
+ if cookie.secure and request.type != "https":
_debug(" secure cookie with non-secure request")
return False
return True
diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py
index ddbcbf8..d291678 100644
--- a/Lib/http/cookies.py
+++ b/Lib/http/cookies.py
@@ -159,7 +159,7 @@ class CookieError(Exception):
# _LegalChars is the list of chars which don't require "'s
# _Translator hash-table for fast quoting
#
-_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~"
+_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:"
_Translator = {
'\000' : '\\000', '\001' : '\\001', '\002' : '\\002',
'\003' : '\\003', '\004' : '\\004', '\005' : '\\005',
diff --git a/Lib/http/server.py b/Lib/http/server.py
index 5569037..c4ac703 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -100,11 +100,14 @@ import sys
import time
import urllib.parse
import copy
+import argparse
+
# Default error message template
DEFAULT_ERROR_MESSAGE = """\
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
+<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<title>Error response</title>
@@ -352,6 +355,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
"""
self.send_response_only(100)
+ self.flush_headers()
return True
def handle_one_request(self):
@@ -429,7 +433,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
self.wfile.write(content.encode('UTF-8', 'replace'))
def send_response(self, code, message=None):
- """Send the response header and log the response code.
+ """Add the response header to the headers buffer and log the
+ response code.
Also send two standard headers with the server software
version and the current date.
@@ -448,16 +453,19 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
else:
message = ''
if self.request_version != 'HTTP/0.9':
- self.wfile.write(("%s %d %s\r\n" %
- (self.protocol_version, code, message)).encode('latin1', 'strict'))
+ if not hasattr(self, '_headers_buffer'):
+ self._headers_buffer = []
+ self._headers_buffer.append(("%s %d %s\r\n" %
+ (self.protocol_version, code, message)).encode(
+ 'latin-1', 'strict'))
def send_header(self, keyword, value):
- """Send a MIME header."""
+ """Send a MIME header to the headers buffer."""
if self.request_version != 'HTTP/0.9':
if not hasattr(self, '_headers_buffer'):
self._headers_buffer = []
self._headers_buffer.append(
- ("%s: %s\r\n" % (keyword, value)).encode('latin1', 'strict'))
+ ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
if keyword.lower() == 'connection':
if value.lower() == 'close':
@@ -469,6 +477,10 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
"""Send the blank line ending the MIME headers."""
if self.request_version != 'HTTP/0.9':
self._headers_buffer.append(b"\r\n")
+ self.flush_headers()
+
+ def flush_headers(self):
+ if hasattr(self, '_headers_buffer'):
self.wfile.write(b"".join(self._headers_buffer))
self._headers_buffer = []
@@ -514,7 +526,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
"""
sys.stderr.write("%s - - [%s] %s\n" %
- (self.client_address[0],
+ (self.address_string(),
self.log_date_time_string(),
format%args))
@@ -548,15 +560,9 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
def address_string(self):
- """Return the client address formatted for logging.
-
- This version looks up the full hostname using gethostbyaddr(),
- and tries to find a name that contains at least one dot.
-
- """
+ """Return the client address."""
- host, port = self.client_address[:2]
- return socket.getfqdn(host)
+ return self.client_address[0]
# Essentially static class variables
@@ -569,7 +575,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
# Table mapping response codes to messages; entries have the
# form {code: (shortmessage, longmessage)}.
- # See RFC 2616.
+ # See RFC 2616 and 6585.
responses = {
100: ('Continue', 'Request received, please continue'),
101: ('Switching Protocols',
@@ -624,6 +630,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
'Cannot satisfy request range.'),
417: ('Expectation Failed',
'Expect condition could not be satisfied.'),
+ 428: ('Precondition Required',
+ 'The origin server requires the request to be conditional.'),
+ 429: ('Too Many Requests', 'The user has sent too many requests '
+ 'in a given amount of time ("rate limiting").'),
+ 431: ('Request Header Fields Too Large', 'The server is unwilling to '
+ 'process the request because its header fields are too large.'),
500: ('Internal Server Error', 'Server got itself in trouble'),
501: ('Not Implemented',
@@ -634,6 +646,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
504: ('Gateway Timeout',
'The gateway server did not receive a timely response'),
505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
+ 511: ('Network Authentication Required',
+ 'The client needs to authenticate to gain network access.'),
}
@@ -722,10 +736,16 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
list.sort(key=lambda a: a.lower())
r = []
displaypath = html.escape(urllib.parse.unquote(self.path))
- r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
- r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
- r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
- r.append("<hr>\n<ul>\n")
+ enc = sys.getfilesystemencoding()
+ title = 'Directory listing for %s' % displaypath
+ r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
+ '"http://www.w3.org/TR/html4/strict.dtd">')
+ r.append('<html>\n<head>')
+ r.append('<meta http-equiv="Content-Type" '
+ 'content="text/html; charset=%s">' % enc)
+ r.append('<title>%s</title>\n</head>' % title)
+ r.append('<body>\n<h1>%s</h1>' % title)
+ r.append('<hr>\n<ul>')
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
@@ -736,11 +756,10 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
if os.path.islink(fullname):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
- r.append('<li><a href="%s">%s</a>\n'
+ r.append('<li><a href="%s">%s</a></li>'
% (urllib.parse.quote(linkname), html.escape(displayname)))
- r.append("</ul>\n<hr>\n</body>\n</html>\n")
- enc = sys.getfilesystemencoding()
- encoded = ''.join(r).encode(enc)
+ r.append('</ul>\n<hr>\n</body>\n</html>\n')
+ encoded = '\n'.join(r).encode(enc)
f = io.BytesIO()
f.write(encoded)
f.seek(0)
@@ -888,11 +907,7 @@ def nobody_uid():
def executable(path):
"""Test for executable file."""
- try:
- st = os.stat(path)
- except os.error:
- return False
- return st.st_mode & 0o111 != 0
+ return os.access(path, os.X_OK)
class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
@@ -1008,7 +1023,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
scriptname)
return
ispy = self.is_python(scriptname)
- if not ispy:
+ if self.have_fork or not ispy:
if not self.is_executable(scriptfile):
self.send_error(403, "CGI script is not executable (%r)" %
scriptname)
@@ -1029,9 +1044,6 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
env['SCRIPT_NAME'] = scriptname
if query:
env['QUERY_STRING'] = query
- host = self.address_string()
- if host != self.client_address[0]:
- env['REMOTE_HOST'] = host
env['REMOTE_ADDR'] = self.client_address[0]
authorization = self.headers.get("authorization")
if authorization:
@@ -1083,6 +1095,7 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
env.setdefault(k, "")
self.send_response(200, "Script output follows")
+ self.flush_headers()
decoded_query = query.replace('+', ' ')
@@ -1162,18 +1175,13 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
def test(HandlerClass = BaseHTTPRequestHandler,
- ServerClass = HTTPServer, protocol="HTTP/1.0"):
+ ServerClass = HTTPServer, protocol="HTTP/1.0", port=8000):
"""Test the HTTP request handler class.
This runs an HTTP server on port 8000 (or the first command line
argument).
"""
-
- if sys.argv[1:]:
- port = int(sys.argv[1])
- else:
- port = 8000
server_address = ('', port)
HandlerClass.protocol_version = protocol
@@ -1189,4 +1197,15 @@ def test(HandlerClass = BaseHTTPRequestHandler,
sys.exit(0)
if __name__ == '__main__':
- test(HandlerClass=SimpleHTTPRequestHandler)
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--cgi', action='store_true',
+ help='Run as CGI Server')
+ parser.add_argument('port', action='store',
+ default=8000, type=int,
+ nargs='?',
+ help='Specify alternate port [default: 8000]')
+ args = parser.parse_args()
+ if args.cgi:
+ test(HandlerClass=CGIHTTPRequestHandler, port=args.port)
+ else:
+ test(HandlerClass=SimpleHTTPRequestHandler, port=args.port)
diff --git a/Lib/idlelib/AutoComplete.py b/Lib/idlelib/AutoComplete.py
index e4c1aff..929d358 100644
--- a/Lib/idlelib/AutoComplete.py
+++ b/Lib/idlelib/AutoComplete.py
@@ -9,9 +9,6 @@ import string
from idlelib.configHandler import idleConf
-# This string includes all chars that may be in a file name (without a path
-# separator)
-FILENAME_CHARS = string.ascii_letters + string.digits + os.curdir + "._~#$:-"
# This string includes all chars that may be in an identifier
ID_CHARS = string.ascii_letters + string.digits + "_"
diff --git a/Lib/idlelib/ColorDelegator.py b/Lib/idlelib/ColorDelegator.py
index e188192..e4ccb42 100644
--- a/Lib/idlelib/ColorDelegator.py
+++ b/Lib/idlelib/ColorDelegator.py
@@ -21,10 +21,11 @@ def make_pat():
# 1st 'file' colorized normal, 2nd as builtin, 3rd as string
builtin = r"([^.'\"\\#]\b|^)" + any("BUILTIN", builtinlist) + r"\b"
comment = any("COMMENT", [r"#[^\n]*"])
- sqstring = r"(\b[rRbB])?'[^'\\\n]*(\\.[^'\\\n]*)*'?"
- dqstring = r'(\b[rRbB])?"[^"\\\n]*(\\.[^"\\\n]*)*"?'
- sq3string = r"(\b[rRbB])?'''[^'\\]*((\\.|'(?!''))[^'\\]*)*(''')?"
- dq3string = r'(\b[rRbB])?"""[^"\\]*((\\.|"(?!""))[^"\\]*)*(""")?'
+ stringprefix = r"(\br|u|ur|R|U|UR|Ur|uR|b|B|br|Br|bR|BR|rb|rB|Rb|RB)?"
+ sqstring = stringprefix + r"'[^'\\\n]*(\\.[^'\\\n]*)*'?"
+ dqstring = stringprefix + r'"[^"\\\n]*(\\.[^"\\\n]*)*"?'
+ sq3string = stringprefix + r"'''[^'\\]*((\\.|'(?!''))[^'\\]*)*(''')?"
+ dq3string = stringprefix + r'"""[^"\\]*((\\.|"(?!""))[^"\\]*)*(""")?'
string = any("STRING", [sq3string, dq3string, sqstring, dqstring])
return kw + "|" + builtin + "|" + comment + "|" + string +\
"|" + any("SYNC", [r"\n"])
@@ -149,9 +150,9 @@ class ColorDelegator(Delegator):
self.stop_colorizing = False
self.colorizing = True
if DEBUG: print("colorizing...")
- t0 = time.clock()
+ t0 = time.perf_counter()
self.recolorize_main()
- t1 = time.clock()
+ t1 = time.perf_counter()
if DEBUG: print("%.3f seconds" % (t1-t0))
finally:
self.colorizing = False
diff --git a/Lib/idlelib/EditorWindow.py b/Lib/idlelib/EditorWindow.py
index bec2191..6bdcecc 100644
--- a/Lib/idlelib/EditorWindow.py
+++ b/Lib/idlelib/EditorWindow.py
@@ -1,8 +1,9 @@
-import sys
+import imp
+import importlib
import os
import re
import string
-import imp
+import sys
from tkinter import *
import tkinter.simpledialog as tkSimpleDialog
import tkinter.messagebox as tkMessageBox
@@ -27,8 +28,7 @@ def _sphinx_version():
"Format sys.version_info to produce the Sphinx version string used to install the chm docs"
major, minor, micro, level, serial = sys.version_info
release = '%s%s' % (major, minor)
- if micro:
- release += '%s' % (micro,)
+ release += '%s' % (micro,)
if level == 'candidate':
release += 'rc%s' % (serial,)
elif level != 'final':
@@ -120,7 +120,7 @@ class EditorWindow(object):
def __init__(self, flist=None, filename=None, key=None, root=None):
if EditorWindow.help_url is None:
- dochome = os.path.join(sys.prefix, 'Doc', 'index.html')
+ dochome = os.path.join(sys.base_prefix, 'Doc', 'index.html')
if sys.platform.count('linux'):
# look for html docs in a couple of standard places
pyver = 'python-docs-' + '%s.%s.%s' % sys.version_info[:3]
@@ -131,13 +131,13 @@ class EditorWindow(object):
dochome = os.path.join(basepath, pyver,
'Doc', 'index.html')
elif sys.platform[:3] == 'win':
- chmfile = os.path.join(sys.prefix, 'Doc',
+ chmfile = os.path.join(sys.base_prefix, 'Doc',
'Python%s.chm' % _sphinx_version())
if os.path.isfile(chmfile):
dochome = chmfile
elif macosxSupport.runningAsOSXApp():
# documentation is stored inside the python framework
- dochome = os.path.join(sys.prefix,
+ dochome = os.path.join(sys.base_prefix,
'Resources/English.lproj/Documentation/index.html')
dochome = os.path.normpath(dochome)
if os.path.isfile(dochome):
@@ -1005,7 +1005,10 @@ class EditorWindow(object):
def load_extension(self, name):
try:
- mod = __import__(name, globals(), locals(), [])
+ try:
+ mod = importlib.import_module('.' + name, package=__package__)
+ except ImportError:
+ mod = importlib.import_module(name)
except ImportError:
print("\nFailed to import extension: ", name)
raise
diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py
index 9528c9a..ec50eb2 100644
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@@ -485,6 +485,8 @@ class IOBinding:
("All files", "*"),
]
+ defaultextension = '.py' if sys.platform == 'darwin' else ''
+
def askopenfile(self):
dir, base = self.defaultfilename("open")
if not self.opendialog:
@@ -508,8 +510,10 @@ class IOBinding:
def asksavefile(self):
dir, base = self.defaultfilename("save")
if not self.savedialog:
- self.savedialog = tkFileDialog.SaveAs(master=self.text,
- filetypes=self.filetypes)
+ self.savedialog = tkFileDialog.SaveAs(
+ master=self.text,
+ filetypes=self.filetypes,
+ defaultextension=self.defaultextension)
filename = self.savedialog.show(initialdir=dir, initialfile=base)
return filename
diff --git a/Lib/idlelib/NEWS.txt b/Lib/idlelib/NEWS.txt
index 3160c74..292c5a0 100644
--- a/Lib/idlelib/NEWS.txt
+++ b/Lib/idlelib/NEWS.txt
@@ -1,10 +1,20 @@
-What's New in IDLE 3.2.4?
+What's New in IDLE 3.3.1?
+=========================
+
+- Issue #16226: Fix IDLE Path Browser crash.
+ (Patch by Roger Serwy)
+
+
+What's New in IDLE 3.3.0?
=========================
- Issue #7163: Propagate return value of sys.stdout.write.
- Issue #15318: Prevent writing to sys.stdin.
+- Issue #4832: Modify IDLE to save files with .py extension by
+ default on Windows and OS X (Tk 8.5) as it already does with X11 Tk.
+
- Issue #13532, #15319: Check that arguments to sys.stdout.write are strings.
- Issue # 12510: Attempt to get certain tool tips no longer crashes IDLE.
@@ -18,12 +28,10 @@ What's New in IDLE 3.2.4?
- Issue #14937: Perform auto-completion of filenames in strings even for
non-ASCII filenames. Likewise for identifiers.
-- Issue #14018: Update checks for unstable system Tcl/Tk versions on OS X
- to include versions shipped with OS X 10.7 and 10.8 in addition to 10.6.
-
+- Issue #8515: Set __file__ when run file in IDLE.
+ Initial patch by Bruce Frederiksen.
-What's New in IDLE 3.2.3?
-=========================
+- IDLE can be launched as `python -m idlelib`
- Issue #14409: IDLE now properly executes commands in the Shell window
when it cannot read the normal config files on startup and
@@ -33,6 +41,9 @@ What's New in IDLE 3.2.3?
- Issue #3573: IDLE hangs when passing invalid command line args
(directory(ies) instead of file(s)).
+- Issue #14018: Update checks for unstable system Tcl/Tk versions on OS X
+ to include versions shipped with OS X 10.7 and 10.8 in addition to 10.6.
+
What's New in IDLE 3.2.1?
=========================
diff --git a/Lib/idlelib/PathBrowser.py b/Lib/idlelib/PathBrowser.py
index d88a48e..55bf1aa 100644
--- a/Lib/idlelib/PathBrowser.py
+++ b/Lib/idlelib/PathBrowser.py
@@ -1,6 +1,7 @@
import os
import sys
import imp
+import importlib.machinery
from idlelib.TreeWidget import TreeItem
from idlelib.ClassBrowser import ClassBrowser, ModuleBrowserTreeItem
@@ -70,9 +71,11 @@ class DirBrowserTreeItem(TreeItem):
def listmodules(self, allnames):
modules = {}
- suffixes = imp.get_suffixes()
+ suffixes = importlib.machinery.EXTENSION_SUFFIXES[:]
+ suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
+ suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
sorted = []
- for suff, mode, flag in suffixes:
+ for suff in suffixes:
i = -len(suff)
for name in allnames[:]:
normed_name = os.path.normcase(name)
diff --git a/Lib/idlelib/PyShell.py b/Lib/idlelib/PyShell.py
index 88c0390..50d6182 100644
--- a/Lib/idlelib/PyShell.py
+++ b/Lib/idlelib/PyShell.py
@@ -477,6 +477,10 @@ class ModifiedInterpreter(InteractiveInterpreter):
def kill_subprocess(self):
try:
+ self.rpcclt.listening_sock.close()
+ except AttributeError: # no socket
+ pass
+ try:
self.rpcclt.close()
except AttributeError: # no socket
pass
@@ -1005,6 +1009,8 @@ class PyShell(OutputWindow):
return False
else:
nosub = "==== No Subprocess ===="
+ sys.displayhook = rpc.displayhook
+
self.write("Python %s on %s\n%s\n%s" %
(sys.version, sys.platform, self.COPYRIGHT, nosub))
self.showprompt()
@@ -1227,6 +1233,16 @@ class PyShell(OutputWindow):
self.set_line_and_column()
def write(self, s, tags=()):
+ if isinstance(s, str) and len(s) and max(s) > '\uffff':
+ # Tk doesn't support outputting non-BMP characters
+ # Let's assume what printed string is not very long,
+ # find first non-BMP character and construct informative
+ # UnicodeEncodeError exception.
+ for start, char in enumerate(s):
+ if char > '\uffff':
+ break
+ raise UnicodeEncodeError("UCS-2", char, start, start+1,
+ 'Non-BMP character not supported in Tk')
try:
self.text.mark_gravity("iomark", "right")
count = OutputWindow.write(self, s, tags, "iomark")
diff --git a/Lib/idlelib/ScriptBinding.py b/Lib/idlelib/ScriptBinding.py
index 18ce965..528adf6 100644
--- a/Lib/idlelib/ScriptBinding.py
+++ b/Lib/idlelib/ScriptBinding.py
@@ -150,16 +150,16 @@ class ScriptBinding:
dirname = os.path.dirname(filename)
# XXX Too often this discards arguments the user just set...
interp.runcommand("""if 1:
- _filename = %r
+ __file__ = {filename!r}
import sys as _sys
from os.path import basename as _basename
if (not _sys.argv or
- _basename(_sys.argv[0]) != _basename(_filename)):
- _sys.argv = [_filename]
+ _basename(_sys.argv[0]) != _basename(__file__)):
+ _sys.argv = [__file__]
import os as _os
- _os.chdir(%r)
- del _filename, _sys, _basename, _os
- \n""" % (filename, dirname))
+ _os.chdir({dirname!r})
+ del _sys, _basename, _os
+ \n""".format(filename=filename, dirname=dirname))
interp.prepend_syspath(filename)
# XXX KBK 03Jul04 When run w/o subprocess, runtime warnings still
# go to __stderr__. With subprocess, they go to the shell.
diff --git a/Lib/idlelib/__main__.py b/Lib/idlelib/__main__.py
new file mode 100644
index 0000000..0666f2f
--- /dev/null
+++ b/Lib/idlelib/__main__.py
@@ -0,0 +1,9 @@
+"""
+IDLE main entry point
+
+Run IDLE as python -m idlelib
+"""
+
+
+import idlelib.PyShell
+idlelib.PyShell.main()
diff --git a/Lib/idlelib/configHandler.py b/Lib/idlelib/configHandler.py
index da92726..4049004 100644
--- a/Lib/idlelib/configHandler.py
+++ b/Lib/idlelib/configHandler.py
@@ -145,7 +145,8 @@ class IdleUserConfParser(IdleConfParser):
except IOError:
os.unlink(fname)
cfgFile = open(fname, 'w')
- self.write(cfgFile)
+ with cfgFile:
+ self.write(cfgFile)
else:
self.RemoveFile()
diff --git a/Lib/idlelib/idlever.py b/Lib/idlelib/idlever.py
index 17455db..8d8317d 100644
--- a/Lib/idlelib/idlever.py
+++ b/Lib/idlelib/idlever.py
@@ -1 +1 @@
-IDLE_VERSION = "3.2.3"
+IDLE_VERSION = "3.3.0"
diff --git a/Lib/idlelib/macosxSupport.py b/Lib/idlelib/macosxSupport.py
index 9690442..67069fa 100644
--- a/Lib/idlelib/macosxSupport.py
+++ b/Lib/idlelib/macosxSupport.py
@@ -12,12 +12,22 @@ _appbundle = None
def runningAsOSXApp():
"""
Returns True if Python is running from within an app on OSX.
- If so, assume that Python was built with Aqua Tcl/Tk rather than
- X11 Tcl/Tk.
+ If so, the various OS X customizations will be triggered later (menu
+ fixup, et al). (Originally, this test was supposed to condition
+ behavior on whether IDLE was running under Aqua Tk rather than
+ under X11 Tk but that does not work since a framework build
+ could be linked with X11. For several releases, this test actually
+ differentiates between whether IDLE is running from a framework or
+ not. As a future enhancement, it should be considered whether there
+ should be a difference based on framework and any needed X11 adaptions
+ should be made dependent on a new function that actually tests for X11.)
"""
global _appbundle
if _appbundle is None:
- _appbundle = (sys.platform == 'darwin' and '.app' in sys.executable)
+ _appbundle = sys.platform == 'darwin'
+ if _appbundle:
+ import sysconfig
+ _appbundle = bool(sysconfig.get_config_var('PYTHONFRAMEWORK'))
return _appbundle
_carbonaquatk = None
diff --git a/Lib/idlelib/rpc.py b/Lib/idlelib/rpc.py
index def4394..77cb3ac 100644
--- a/Lib/idlelib/rpc.py
+++ b/Lib/idlelib/rpc.py
@@ -40,6 +40,7 @@ import traceback
import copyreg
import types
import marshal
+import builtins
def unpickle_code(ms):
@@ -196,8 +197,12 @@ class SocketIO(object):
return ("ERROR", "Unsupported message type: %s" % how)
except SystemExit:
raise
+ except KeyboardInterrupt:
+ raise
except socket.error:
raise
+ except Exception as ex:
+ return ("CALLEXC", ex)
except:
msg = "*** Internal Error: rpc.py:SocketIO.localcall()\n\n"\
" Object: %s \n Method: %s \n Args: %s\n"
@@ -257,6 +262,9 @@ class SocketIO(object):
if how == "ERROR":
self.debug("decoderesponse: Internal ERROR:", what)
raise RuntimeError(what)
+ if how == "CALLEXC":
+ self.debug("decoderesponse: Call Exception:", what)
+ raise what
raise SystemError(how, what)
def decode_interrupthook(self):
@@ -596,3 +604,21 @@ class MethodProxy(object):
# XXX KBK 09Sep03 We need a proper unit test for this module. Previously
# existing test code was removed at Rev 1.27 (r34098).
+
+def displayhook(value):
+ """Override standard display hook to use non-locale encoding"""
+ if value is None:
+ return
+ # Set '_' to None to avoid recursion
+ builtins._ = None
+ text = repr(value)
+ try:
+ sys.stdout.write(text)
+ except UnicodeEncodeError:
+ # let's use ascii while utf8-bmp codec doesn't present
+ encoding = 'ascii'
+ bytes = text.encode(encoding, 'backslashreplace')
+ text = bytes.decode(encoding, 'strict')
+ sys.stdout.write(text)
+ sys.stdout.write("\n")
+ builtins._ = value
diff --git a/Lib/idlelib/run.py b/Lib/idlelib/run.py
index 5365680..9872af4 100644
--- a/Lib/idlelib/run.py
+++ b/Lib/idlelib/run.py
@@ -7,6 +7,7 @@ import traceback
import _thread as thread
import threading
import queue
+import tkinter
from idlelib import CallTips
from idlelib import AutoComplete
@@ -39,6 +40,17 @@ else:
return s
warnings.formatwarning = idle_formatwarning_subproc
+
+tcl = tkinter.Tcl()
+
+
+def handle_tk_events(tcl=tcl):
+ """Process any tk events that are ready to be dispatched if tkinter
+ has been imported, a tcl interpreter has been created and tk has been
+ loaded."""
+ tcl.eval("update")
+
+
# Thread shared globals: Establish a queue between a subthread (which handles
# the socket) and the main thread (which runs user code), plus global
# completion, exit and interruptable (the main thread) flags:
@@ -94,6 +106,7 @@ def main(del_exitfunc=False):
try:
seq, request = rpc.request_queue.get(block=True, timeout=0.05)
except queue.Empty:
+ handle_tk_events()
continue
method, args, kwargs = request
ret = method(*args, **kwargs)
@@ -295,6 +308,7 @@ class MyHandler(rpc.RPCHandler):
sys.stdin = _RPCInputFile(self.console)
sys.stdout = _RPCOutputFile(self.get_remote_proxy("stdout"))
sys.stderr = _RPCOutputFile(self.get_remote_proxy("stderr"))
+ sys.displayhook = rpc.displayhook
# page help() text to shell.
import pydoc # import must be done here to capture i/o binding
pydoc.pager = pydoc.plainpager
diff --git a/Lib/imaplib.py b/Lib/imaplib.py
index c0334d8..e16fb95 100644
--- a/Lib/imaplib.py
+++ b/Lib/imaplib.py
@@ -23,7 +23,7 @@ Public functions: Internaldate2tuple
__version__ = "2.58"
import binascii, errno, random, re, socket, subprocess, sys, time, calendar
-
+from datetime import datetime, timezone, timedelta
try:
import ssl
HAVE_SSL = True
@@ -249,15 +249,7 @@ class IMAP4:
def read(self, size):
"""Read 'size' bytes from remote."""
- chunks = []
- read = 0
- while read < size:
- data = self.file.read(min(size-read, 4096))
- if not data:
- break
- read += len(data)
- chunks.append(data)
- return b''.join(chunks)
+ return self.file.read(size)
def readline(self):
@@ -1177,25 +1169,40 @@ if HAVE_SSL:
"""IMAP4 client class over SSL connection
- Instantiate with: IMAP4_SSL([host[, port[, keyfile[, certfile]]]])
+ Instantiate with: IMAP4_SSL([host[, port[, keyfile[, certfile[, ssl_context]]]]])
host - host's name (default: localhost);
- port - port number (default: standard IMAP4 SSL port).
+ port - port number (default: standard IMAP4 SSL port);
keyfile - PEM formatted file that contains your private key (default: None);
certfile - PEM formatted certificate chain file (default: None);
+ ssl_context - a SSLContext object that contains your certificate chain
+ and private key (default: None)
+ Note: if ssl_context is provided, then parameters keyfile or
+ certfile should not be set otherwise ValueError is thrown.
for more documentation see the docstring of the parent class IMAP4.
"""
- def __init__(self, host = '', port = IMAP4_SSL_PORT, keyfile = None, certfile = None):
+ def __init__(self, host='', port=IMAP4_SSL_PORT, keyfile=None, certfile=None, ssl_context=None):
+ if ssl_context is not None and keyfile is not None:
+ raise ValueError("ssl_context and keyfile arguments are mutually "
+ "exclusive")
+ if ssl_context is not None and certfile is not None:
+ raise ValueError("ssl_context and certfile arguments are mutually "
+ "exclusive")
+
self.keyfile = keyfile
self.certfile = certfile
+ self.ssl_context = ssl_context
IMAP4.__init__(self, host, port)
def _create_socket(self):
sock = IMAP4._create_socket(self)
- return ssl.wrap_socket(sock, self.keyfile, self.certfile)
+ if self.ssl_context:
+ return self.ssl_context.wrap_socket(sock)
+ else:
+ return ssl.wrap_socket(sock, self.keyfile, self.certfile)
def open(self, host='', port=IMAP4_SSL_PORT):
"""Setup connection to remote server on "host:port".
@@ -1306,10 +1313,8 @@ class _Authenticator:
return ''
return binascii.a2b_base64(inp)
-
-
-Mon2num = {b'Jan': 1, b'Feb': 2, b'Mar': 3, b'Apr': 4, b'May': 5, b'Jun': 6,
- b'Jul': 7, b'Aug': 8, b'Sep': 9, b'Oct': 10, b'Nov': 11, b'Dec': 12}
+Months = ' Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split(' ')
+Mon2num = {s.encode():n+1 for n, s in enumerate(Months[1:])}
def Internaldate2tuple(resp):
"""Parse an IMAP4 INTERNALDATE string.
@@ -1377,28 +1382,37 @@ def Time2Internaldate(date_time):
Return string in form: '"DD-Mmm-YYYY HH:MM:SS +HHMM"'. The
date_time argument can be a number (int or float) representing
seconds since epoch (as returned by time.time()), a 9-tuple
- representing local time (as returned by time.localtime()), or a
+ representing local time, an instance of time.struct_time (as
+ returned by time.localtime()), an aware datetime instance or a
double-quoted string. In the last case, it is assumed to already
be in the correct format.
"""
-
if isinstance(date_time, (int, float)):
- tt = time.localtime(date_time)
- elif isinstance(date_time, (tuple, time.struct_time)):
- tt = date_time
+ dt = datetime.fromtimestamp(date_time,
+ timezone.utc).astimezone()
+ elif isinstance(date_time, tuple):
+ try:
+ gmtoff = date_time.tm_gmtoff
+ except AttributeError:
+ if time.daylight:
+ dst = date_time[8]
+ if dst == -1:
+ dst = time.localtime(time.mktime(date_time))[8]
+ gmtoff = -(time.timezone, time.altzone)[dst]
+ else:
+ gmtoff = -time.timezone
+ delta = timedelta(seconds=gmtoff)
+ dt = datetime(*date_time[:6], tzinfo=timezone(delta))
+ elif isinstance(date_time, datetime):
+ if date_time.tzinfo is None:
+ raise ValueError("date_time must be aware")
+ dt = date_time
elif isinstance(date_time, str) and (date_time[0],date_time[-1]) == ('"','"'):
return date_time # Assume in correct format
else:
raise ValueError("date_time not of a known type")
-
- dt = time.strftime("%d-%b-%Y %H:%M:%S", tt)
- if dt[0] == '0':
- dt = ' ' + dt[1:]
- if time.daylight and tt[-1]:
- zone = -time.altzone
- else:
- zone = -time.timezone
- return '"' + dt + " %+03d%02d" % divmod(zone//60, 60) + '"'
+ fmt = '"%d-{}-%Y %H:%M:%S %z"'.format(Months[dt.month])
+ return dt.strftime(fmt)
diff --git a/Lib/imp.py b/Lib/imp.py
new file mode 100644
index 0000000..da9c84e
--- /dev/null
+++ b/Lib/imp.py
@@ -0,0 +1,257 @@
+"""This module provides the components needed to build your own __import__
+function. Undocumented functions are obsolete.
+
+In most cases it is preferred you consider using the importlib module's
+functionality over this module.
+
+"""
+# (Probably) need to stay in _imp
+from _imp import (lock_held, acquire_lock, release_lock,
+ load_dynamic, get_frozen_object, is_frozen_package,
+ init_builtin, init_frozen, is_builtin, is_frozen,
+ _fix_co_filename)
+
+# Directly exposed by this module
+from importlib._bootstrap import new_module
+from importlib._bootstrap import cache_from_source, source_from_cache
+
+
+from importlib import _bootstrap
+from importlib import machinery
+import os
+import sys
+import tokenize
+import warnings
+
+
+# DEPRECATED
+SEARCH_ERROR = 0
+PY_SOURCE = 1
+PY_COMPILED = 2
+C_EXTENSION = 3
+PY_RESOURCE = 4
+PKG_DIRECTORY = 5
+C_BUILTIN = 6
+PY_FROZEN = 7
+PY_CODERESOURCE = 8
+IMP_HOOK = 9
+
+
+def get_magic():
+ """Return the magic number for .pyc or .pyo files."""
+ return _bootstrap._MAGIC_BYTES
+
+
+def get_tag():
+ """Return the magic tag for .pyc or .pyo files."""
+ return sys.implementation.cache_tag
+
+
+def get_suffixes():
+ warnings.warn('imp.get_suffixes() is deprecated; use the constants '
+ 'defined on importlib.machinery instead',
+ DeprecationWarning, 2)
+ extensions = [(s, 'rb', C_EXTENSION) for s in machinery.EXTENSION_SUFFIXES]
+ source = [(s, 'U', PY_SOURCE) for s in machinery.SOURCE_SUFFIXES]
+ bytecode = [(s, 'rb', PY_COMPILED) for s in machinery.BYTECODE_SUFFIXES]
+
+ return extensions + source + bytecode
+
+
+class NullImporter:
+
+ """Null import object."""
+
+ def __init__(self, path):
+ if path == '':
+ raise ImportError('empty pathname', path='')
+ elif os.path.isdir(path):
+ raise ImportError('existing directory', path=path)
+
+ def find_module(self, fullname):
+ """Always returns None."""
+ return None
+
+
+class _HackedGetData:
+
+ """Compatibiilty support for 'file' arguments of various load_*()
+ functions."""
+
+ def __init__(self, fullname, path, file=None):
+ super().__init__(fullname, path)
+ self.file = file
+
+ def get_data(self, path):
+ """Gross hack to contort loader to deal w/ load_*()'s bad API."""
+ if self.file and path == self.path:
+ with self.file:
+ # Technically should be returning bytes, but
+ # SourceLoader.get_code() just passed what is returned to
+ # compile() which can handle str. And converting to bytes would
+ # require figuring out the encoding to decode to and
+ # tokenize.detect_encoding() only accepts bytes.
+ return self.file.read()
+ else:
+ return super().get_data(path)
+
+
+class _LoadSourceCompatibility(_HackedGetData, _bootstrap.SourceFileLoader):
+
+ """Compatibility support for implementing load_source()."""
+
+
+def load_source(name, pathname, file=None):
+ msg = ('imp.load_source() is deprecated; use '
+ 'importlib.machinery.SourceFileLoader(name, pathname).load_module()'
+ ' instead')
+ warnings.warn(msg, DeprecationWarning, 2)
+ return _LoadSourceCompatibility(name, pathname, file).load_module(name)
+
+
+class _LoadCompiledCompatibility(_HackedGetData,
+ _bootstrap.SourcelessFileLoader):
+
+ """Compatibility support for implementing load_compiled()."""
+
+
+def load_compiled(name, pathname, file=None):
+ msg = ('imp.load_compiled() is deprecated; use '
+ 'importlib.machinery.SourcelessFileLoader(name, pathname).'
+ 'load_module() instead ')
+ warnings.warn(msg, DeprecationWarning, 2)
+ return _LoadCompiledCompatibility(name, pathname, file).load_module(name)
+
+
+def load_package(name, path):
+ msg = ('imp.load_package() is deprecated; use either '
+ 'importlib.machinery.SourceFileLoader() or '
+ 'importlib.machinery.SourcelessFileLoader() instead')
+ warnings.warn(msg, DeprecationWarning, 2)
+ if os.path.isdir(path):
+ extensions = (machinery.SOURCE_SUFFIXES[:] +
+ machinery.BYTECODE_SUFFIXES[:])
+ for extension in extensions:
+ path = os.path.join(path, '__init__'+extension)
+ if os.path.exists(path):
+ break
+ else:
+ raise ValueError('{!r} is not a package'.format(path))
+ return _bootstrap.SourceFileLoader(name, path).load_module(name)
+
+
+def load_module(name, file, filename, details):
+ """**DEPRECATED**
+
+ Load a module, given information returned by find_module().
+
+ The module name must include the full package name, if any.
+
+ """
+ suffix, mode, type_ = details
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+ if mode and (not mode.startswith(('r', 'U')) or '+' in mode):
+ raise ValueError('invalid file open mode {!r}'.format(mode))
+ elif file is None and type_ in {PY_SOURCE, PY_COMPILED, C_EXTENSION}:
+ msg = 'file object required for import (type code {})'.format(type_)
+ raise ValueError(msg)
+ elif type_ == PY_SOURCE:
+ return load_source(name, filename, file)
+ elif type_ == PY_COMPILED:
+ return load_compiled(name, filename, file)
+ elif type_ == C_EXTENSION:
+ return load_dynamic(name, filename, file)
+ elif type_ == PKG_DIRECTORY:
+ return load_package(name, filename)
+ elif type_ == C_BUILTIN:
+ return init_builtin(name)
+ elif type_ == PY_FROZEN:
+ return init_frozen(name)
+ else:
+ msg = "Don't know how to import {} (type code {})".format(name, type_)
+ raise ImportError(msg, name=name)
+
+
+def find_module(name, path=None):
+ """**DEPRECATED**
+
+ Search for a module.
+
+ If path is omitted or None, search for a built-in, frozen or special
+ module and continue search in sys.path. The module name cannot
+ contain '.'; to search for a submodule of a package, pass the
+ submodule name and the package's __path__.
+
+ """
+ if not isinstance(name, str):
+ raise TypeError("'name' must be a str, not {}".format(type(name)))
+ elif not isinstance(path, (type(None), list)):
+ # Backwards-compatibility
+ raise RuntimeError("'list' must be None or a list, "
+ "not {}".format(type(name)))
+
+ if path is None:
+ if is_builtin(name):
+ return None, None, ('', '', C_BUILTIN)
+ elif is_frozen(name):
+ return None, None, ('', '', PY_FROZEN)
+ else:
+ path = sys.path
+
+ for entry in path:
+ package_directory = os.path.join(entry, name)
+ for suffix in ['.py', machinery.BYTECODE_SUFFIXES[0]]:
+ package_file_name = '__init__' + suffix
+ file_path = os.path.join(package_directory, package_file_name)
+ if os.path.isfile(file_path):
+ return None, package_directory, ('', '', PKG_DIRECTORY)
+ with warnings.catch_warnings():
+ warnings.simplefilter('ignore')
+ for suffix, mode, type_ in get_suffixes():
+ file_name = name + suffix
+ file_path = os.path.join(entry, file_name)
+ if os.path.isfile(file_path):
+ break
+ else:
+ continue
+ break # Break out of outer loop when breaking out of inner loop.
+ else:
+ raise ImportError(_bootstrap._ERR_MSG.format(name), name=name)
+
+ encoding = None
+ if mode == 'U':
+ with open(file_path, 'rb') as file:
+ encoding = tokenize.detect_encoding(file.readline)[0]
+ file = open(file_path, mode, encoding=encoding)
+ return file, file_path, (suffix, mode, type_)
+
+
+_RELOADING = {}
+
+def reload(module):
+ """Reload the module and return it.
+
+ The module must have been successfully imported before.
+
+ """
+ if not module or type(module) != type(sys):
+ raise TypeError("reload() argument must be module")
+ name = module.__name__
+ if name not in sys.modules:
+ msg = "module {} not in sys.modules"
+ raise ImportError(msg.format(name), name=name)
+ if name in _RELOADING:
+ return _RELOADING[name]
+ _RELOADING[name] = module
+ try:
+ parent_name = name.rpartition('.')[0]
+ if parent_name and parent_name not in sys.modules:
+ msg = "parent {!r} not in sys.modules"
+ raise ImportError(msg.format(parentname), name=parent_name)
+ return module.__loader__.load_module(name)
+ finally:
+ try:
+ del _RELOADING[name]
+ except KeyError:
+ pass
diff --git a/Lib/importlib/__init__.py b/Lib/importlib/__init__.py
index 2baaf93..6f40dac 100644
--- a/Lib/importlib/__init__.py
+++ b/Lib/importlib/__init__.py
@@ -1,108 +1,72 @@
-"""A pure Python implementation of import.
-
-References on import:
-
- * Language reference
- http://docs.python.org/ref/import.html
- * __import__ function
- http://docs.python.org/lib/built-in-funcs.html
- * Packages
- http://www.python.org/doc/essays/packages.html
- * PEP 235: Import on Case-Insensitive Platforms
- http://www.python.org/dev/peps/pep-0235
- * PEP 275: Import Modules from Zip Archives
- http://www.python.org/dev/peps/pep-0273
- * PEP 302: New Import Hooks
- http://www.python.org/dev/peps/pep-0302/
- * PEP 328: Imports: Multi-line and Absolute/Relative
- http://www.python.org/dev/peps/pep-0328
-
-"""
-__all__ = ['__import__', 'import_module']
-
-from . import _bootstrap
-
-import os
-import re
-import tokenize
+"""A pure Python implementation of import."""
+__all__ = ['__import__', 'import_module', 'invalidate_caches']
# Bootstrap help #####################################################
-def _case_ok(directory, check):
- """Check if the directory contains something matching 'check'.
+# Until bootstrapping is complete, DO NOT import any modules that attempt
+# to import importlib._bootstrap (directly or indirectly). Since this
+# partially initialised package would be present in sys.modules, those
+# modules would get an uninitialised copy of the source version, instead
+# of a fully initialised version (either the frozen one or the one
+# initialised below if the frozen one is not available).
+import _imp # Just the builtin component, NOT the full Python module
+import sys
- No check is done if the file/directory exists or not.
+try:
+ import _frozen_importlib as _bootstrap
+except ImportError:
+ from . import _bootstrap
+ _bootstrap._setup(sys, _imp)
+else:
+ # importlib._bootstrap is the built-in import, ensure we don't create
+ # a second copy of the module.
+ _bootstrap.__name__ = 'importlib._bootstrap'
+ _bootstrap.__package__ = 'importlib'
+ _bootstrap.__file__ = __file__.replace('__init__.py', '_bootstrap.py')
+ sys.modules['importlib._bootstrap'] = _bootstrap
+
+# To simplify imports in test code
+_w_long = _bootstrap._w_long
+_r_long = _bootstrap._r_long
+
+# Fully bootstrapped at this point, import whatever you like, circular
+# dependencies and startup overhead minimisation permitting :)
- """
- if 'PYTHONCASEOK' in os.environ:
- return True
- elif check in os.listdir(directory if directory else os.getcwd()):
- return True
- return False
+# Public API #########################################################
+from ._bootstrap import __import__
-def _w_long(x):
- """Convert a 32-bit integer to little-endian.
- XXX Temporary until marshal's long functions are exposed.
+def invalidate_caches():
+ """Call the invalidate_caches() method on all meta path finders stored in
+ sys.meta_path (where implemented)."""
+ for finder in sys.meta_path:
+ if hasattr(finder, 'invalidate_caches'):
+ finder.invalidate_caches()
- """
- x = int(x)
- int_bytes = []
- int_bytes.append(x & 0xFF)
- int_bytes.append((x >> 8) & 0xFF)
- int_bytes.append((x >> 16) & 0xFF)
- int_bytes.append((x >> 24) & 0xFF)
- return bytearray(int_bytes)
+def find_loader(name, path=None):
+ """Find the loader for the specified module.
-def _r_long(int_bytes):
- """Convert 4 bytes in little-endian to an integer.
+ First, sys.modules is checked to see if the module was already imported. If
+ so, then sys.modules[name].__loader__ is returned. If that happens to be
+ set to None, then ValueError is raised. If the module is not in
+ sys.modules, then sys.meta_path is searched for a suitable loader with the
+ value of 'path' given to the finders. None is returned if no loader could
+ be found.
- XXX Temporary until marshal's long function are exposed.
+ Dotted names do not have their parent packages implicitly imported.
"""
- x = int_bytes[0]
- x |= int_bytes[1] << 8
- x |= int_bytes[2] << 16
- x |= int_bytes[3] << 24
- return x
-
-
-# Required built-in modules.
-try:
- import posix as _os
-except ImportError:
try:
- import nt as _os
- except ImportError:
- try:
- import os2 as _os
- except ImportError:
- raise ImportError('posix, nt, or os2 module required for importlib')
-_bootstrap._os = _os
-import imp, sys, marshal, errno, _io
-_bootstrap.imp = imp
-_bootstrap.sys = sys
-_bootstrap.marshal = marshal
-_bootstrap.errno = errno
-_bootstrap._io = _io
-import _warnings
-_bootstrap._warnings = _warnings
-
-
-from os import sep
-# For os.path.join replacement; pull from Include/osdefs.h:SEP .
-_bootstrap.path_sep = sep
-
-_bootstrap._case_ok = _case_ok
-marshal._w_long = _w_long
-marshal._r_long = _r_long
-
-
-# Public API #########################################################
-
-from ._bootstrap import __import__
+ loader = sys.modules[name].__loader__
+ if loader is None:
+ raise ValueError('{}.__loader__ is None'.format(name))
+ else:
+ return loader
+ except KeyError:
+ pass
+ return _bootstrap._find_module(name, path)
def import_module(name, package=None):
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
index 90eb1a7..26d9250 100644
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -6,33 +6,93 @@ such it requires the injection of specific modules and attributes in order to
work. One should use importlib as the public-facing version of this module.
"""
-
-# Injected modules are '_warnings', 'imp', 'sys', 'marshal', 'errno', '_io',
-# and '_os' (a.k.a. 'posix', 'nt' or 'os2').
-# Injected attribute is path_sep.
#
+# IMPORTANT: Whenever making changes to this module, be sure to run
+# a top-level make in order to get the frozen version of the module
+# update. Not doing so, will result in the Makefile to fail for
+# all others who don't have a ./python around to freeze the module
+# in the early stages of compilation.
+#
+
+# See importlib._setup() for what is injected into the global namespace.
+
# When editing this code be aware that code executed at import time CANNOT
# reference any injected objects! This includes not only global code but also
# anything specified at the class level.
+# XXX Make sure all public names have no single leading underscore and all
+# others do.
+
# Bootstrap-related code ######################################################
-# XXX Could also expose Modules/getpath.c:joinpath()
-def _path_join(*args):
- """Replacement for os.path.join."""
- return path_sep.join(x[:-len(path_sep)] if x.endswith(path_sep) else x
- for x in args if x)
+_CASE_INSENSITIVE_PLATFORMS = 'win', 'cygwin', 'darwin'
-def _path_exists(path):
- """Replacement for os.path.exists."""
- try:
- _os.stat(path)
- except OSError:
- return False
+def _make_relax_case():
+ if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS):
+ def _relax_case():
+ """True if filenames must be checked case-insensitively."""
+ return b'PYTHONCASEOK' in _os.environ
else:
- return True
+ def _relax_case():
+ """True if filenames must be checked case-insensitively."""
+ return False
+ return _relax_case
+
+
+# TODO: Expose from marshal
+def _w_long(x):
+ """Convert a 32-bit integer to little-endian.
+
+ XXX Temporary until marshal's long functions are exposed.
+
+ """
+ x = int(x)
+ int_bytes = []
+ int_bytes.append(x & 0xFF)
+ int_bytes.append((x >> 8) & 0xFF)
+ int_bytes.append((x >> 16) & 0xFF)
+ int_bytes.append((x >> 24) & 0xFF)
+ return bytearray(int_bytes)
+
+
+# TODO: Expose from marshal
+def _r_long(int_bytes):
+ """Convert 4 bytes in little-endian to an integer.
+
+ XXX Temporary until marshal's long function are exposed.
+
+ """
+ x = int_bytes[0]
+ x |= int_bytes[1] << 8
+ x |= int_bytes[2] << 16
+ x |= int_bytes[3] << 24
+ return x
+
+
+def _path_join(*path_parts):
+ """Replacement for os.path.join()."""
+ new_parts = []
+ for part in path_parts:
+ if not part:
+ continue
+ new_parts.append(part)
+ if part[-1] not in path_separators:
+ new_parts.append(path_sep)
+ return ''.join(new_parts[:-1]) # Drop superfluous path separator.
+
+
+def _path_split(path):
+ """Replacement for os.path.split()."""
+ for x in reversed(path):
+ if x in path_separators:
+ sep = x
+ break
+ else:
+ sep = path_sep
+ front, _, tail = path.rpartition(sep)
+ return front, tail
def _path_is_mode_type(path, mode):
@@ -58,61 +118,401 @@ def _path_isdir(path):
return _path_is_mode_type(path, 0o040000)
-def _path_without_ext(path, ext_type):
- """Replacement for os.path.splitext()[0]."""
- for suffix in _suffix_list(ext_type):
- if path.endswith(suffix):
- return path[:-len(suffix)]
- else:
- raise ValueError("path is not of the specified type")
+def _write_atomic(path, data, mode=0o666):
+ """Best-effort function to write data to a path atomically.
+ Be prepared to handle a FileExistsError if concurrent writing of the
+ temporary file is attempted."""
+ # id() is used to generate a pseudo-random filename.
+ path_tmp = '{}.{}'.format(path, id(path))
+ fd = _os.open(path_tmp,
+ _os.O_EXCL | _os.O_CREAT | _os.O_WRONLY, mode & 0o666)
+ try:
+ # We first write data to a temporary file, and then use os.replace() to
+ # perform an atomic rename.
+ with _io.FileIO(fd, 'wb') as file:
+ file.write(data)
+ _os.replace(path_tmp, path)
+ except OSError:
+ try:
+ _os.unlink(path_tmp)
+ except OSError:
+ pass
+ raise
-def _path_absolute(path):
- """Replacement for os.path.abspath."""
- if not path:
- path = _os.getcwd()
+def _wrap(new, old):
+ """Simple substitute for functools.update_wrapper."""
+ for replace in ['__module__', '__name__', '__qualname__', '__doc__']:
+ if hasattr(old, replace):
+ setattr(new, replace, getattr(old, replace))
+ new.__dict__.update(old.__dict__)
+
+
+_code_type = type(_wrap.__code__)
+
+
+def new_module(name):
+ """Create a new module.
+
+ The module is not entered into sys.modules.
+
+ """
+ return type(_io)(name)
+
+
+# Module-level locking ########################################################
+
+# A dict mapping module names to weakrefs of _ModuleLock instances
+_module_locks = {}
+# A dict mapping thread ids to _ModuleLock instances
+_blocking_on = {}
+
+
+class _DeadlockError(RuntimeError):
+ pass
+
+
+class _ModuleLock:
+ """A recursive lock implementation which is able to detect deadlocks
+ (e.g. thread 1 trying to take locks A then B, and thread 2 trying to
+ take locks B then A).
+ """
+
+ def __init__(self, name):
+ self.lock = _thread.allocate_lock()
+ self.wakeup = _thread.allocate_lock()
+ self.name = name
+ self.owner = None
+ self.count = 0
+ self.waiters = 0
+
+ def has_deadlock(self):
+ # Deadlock avoidance for concurrent circular imports.
+ me = _thread.get_ident()
+ tid = self.owner
+ while True:
+ lock = _blocking_on.get(tid)
+ if lock is None:
+ return False
+ tid = lock.owner
+ if tid == me:
+ return True
+
+ def acquire(self):
+ """
+ Acquire the module lock. If a potential deadlock is detected,
+ a _DeadlockError is raised.
+ Otherwise, the lock is always acquired and True is returned.
+ """
+ tid = _thread.get_ident()
+ _blocking_on[tid] = self
+ try:
+ while True:
+ with self.lock:
+ if self.count == 0 or self.owner == tid:
+ self.owner = tid
+ self.count += 1
+ return True
+ if self.has_deadlock():
+ raise _DeadlockError("deadlock detected by %r" % self)
+ if self.wakeup.acquire(False):
+ self.waiters += 1
+ # Wait for a release() call
+ self.wakeup.acquire()
+ self.wakeup.release()
+ finally:
+ del _blocking_on[tid]
+
+ def release(self):
+ tid = _thread.get_ident()
+ with self.lock:
+ if self.owner != tid:
+ raise RuntimeError("cannot release un-acquired lock")
+ assert self.count > 0
+ self.count -= 1
+ if self.count == 0:
+ self.owner = None
+ if self.waiters:
+ self.waiters -= 1
+ self.wakeup.release()
+
+ def __repr__(self):
+ return "_ModuleLock(%r) at %d" % (self.name, id(self))
+
+
+class _DummyModuleLock:
+ """A simple _ModuleLock equivalent for Python builds without
+ multi-threading support."""
+
+ def __init__(self, name):
+ self.name = name
+ self.count = 0
+
+ def acquire(self):
+ self.count += 1
+ return True
+
+ def release(self):
+ if self.count == 0:
+ raise RuntimeError("cannot release un-acquired lock")
+ self.count -= 1
+
+ def __repr__(self):
+ return "_DummyModuleLock(%r) at %d" % (self.name, id(self))
+
+
+# The following two functions are for consumption by Python/import.c.
+
+def _get_module_lock(name):
+ """Get or create the module lock for a given module name.
+
+ Should only be called with the import lock taken."""
+ lock = None
try:
- return _os._getfullpathname(path)
- except AttributeError:
- if path.startswith('/'):
- return path
+ lock = _module_locks[name]()
+ except KeyError:
+ pass
+ if lock is None:
+ if _thread is None:
+ lock = _DummyModuleLock(name)
else:
- return _path_join(_os.getcwd(), path)
+ lock = _ModuleLock(name)
+ def cb(_):
+ del _module_locks[name]
+ _module_locks[name] = _weakref.ref(lock, cb)
+ return lock
+
+def _lock_unlock_module(name):
+ """Release the global import lock, and acquires then release the
+ module lock for a given module name.
+ This is used to ensure a module is completely initialized, in the
+ event it is being imported by another thread.
+
+ Should only be called with the import lock taken."""
+ lock = _get_module_lock(name)
+ _imp.release_lock()
+ try:
+ lock.acquire()
+ except _DeadlockError:
+ # Concurrent circular import, we'll accept a partially initialized
+ # module object.
+ pass
+ else:
+ lock.release()
+# Frame stripping magic ###############################################
-def _wrap(new, old):
- """Simple substitute for functools.wraps."""
- for replace in ['__module__', '__name__', '__doc__']:
- setattr(new, replace, getattr(old, replace))
- new.__dict__.update(old.__dict__)
+def _call_with_frames_removed(f, *args, **kwds):
+ """remove_importlib_frames in import.c will always remove sequences
+ of importlib frames that end with a call to this function
+
+ Use it instead of a normal call in places where including the importlib
+ frames introduces unwanted noise into the traceback (e.g. when executing
+ module code)
+ """
+ return f(*args, **kwds)
+
+
+# Finder/loader utility code ###############################################
+
+"""Magic word to reject .pyc files generated by other Python versions.
+It should change for each incompatible change to the bytecode.
+
+The value of CR and LF is incorporated so if you ever read or write
+a .pyc file in text mode the magic number will be wrong; also, the
+Apple MPW compiler swaps their values, botching string constants.
+
+The magic numbers must be spaced apart at least 2 values, as the
+-U interpeter flag will cause MAGIC+1 being used. They have been
+odd numbers for some time now.
+
+There were a variety of old schemes for setting the magic number.
+The current working scheme is to increment the previous value by
+10.
+
+Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic
+number also includes a new "magic tag", i.e. a human readable string used
+to represent the magic number in __pycache__ directories. When you change
+the magic number, you must also set a new unique magic tag. Generally this
+can be named after the Python major version of the magic number bump, but
+it can really be anything, as long as it's different than anything else
+that's come before. The tags are included in the following table, starting
+with Python 3.2a0.
+
+Known values:
+ Python 1.5: 20121
+ Python 1.5.1: 20121
+ Python 1.5.2: 20121
+ Python 1.6: 50428
+ Python 2.0: 50823
+ Python 2.0.1: 50823
+ Python 2.1: 60202
+ Python 2.1.1: 60202
+ Python 2.1.2: 60202
+ Python 2.2: 60717
+ Python 2.3a0: 62011
+ Python 2.3a0: 62021
+ Python 2.3a0: 62011 (!)
+ Python 2.4a0: 62041
+ Python 2.4a3: 62051
+ Python 2.4b1: 62061
+ Python 2.5a0: 62071
+ Python 2.5a0: 62081 (ast-branch)
+ Python 2.5a0: 62091 (with)
+ Python 2.5a0: 62092 (changed WITH_CLEANUP opcode)
+ Python 2.5b3: 62101 (fix wrong code: for x, in ...)
+ Python 2.5b3: 62111 (fix wrong code: x += yield)
+ Python 2.5c1: 62121 (fix wrong lnotab with for loops and
+ storing constants that should have been removed)
+ Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp)
+ Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode)
+ Python 2.6a1: 62161 (WITH_CLEANUP optimization)
+ Python 3000: 3000
+ 3010 (removed UNARY_CONVERT)
+ 3020 (added BUILD_SET)
+ 3030 (added keyword-only parameters)
+ 3040 (added signature annotations)
+ 3050 (print becomes a function)
+ 3060 (PEP 3115 metaclass syntax)
+ 3061 (string literals become unicode)
+ 3071 (PEP 3109 raise changes)
+ 3081 (PEP 3137 make __file__ and __name__ unicode)
+ 3091 (kill str8 interning)
+ 3101 (merge from 2.6a0, see 62151)
+ 3103 (__file__ points to source file)
+ Python 3.0a4: 3111 (WITH_CLEANUP optimization).
+ Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT)
+ Python 3.1a0: 3141 (optimize list, set and dict comprehensions:
+ change LIST_APPEND and SET_ADD, add MAP_ADD)
+ Python 3.1a0: 3151 (optimize conditional branches:
+ introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
+ Python 3.2a0: 3160 (add SETUP_WITH)
+ tag: cpython-32
+ Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR)
+ tag: cpython-32
+ Python 3.2a2 3180 (add DELETE_DEREF)
+ Python 3.3a0 3190 __class__ super closure changed
+ Python 3.3a0 3200 (__qualname__ added)
+ 3210 (added size modulo 2**32 to the pyc header)
+ Python 3.3a1 3220 (changed PEP 380 implementation)
+ Python 3.3a4 3230 (revert changes to implicit __class__ closure)
+
+MAGIC must change whenever the bytecode emitted by the compiler may no
+longer be understood by older implementations of the eval loop (usually
+due to the addition of new opcodes).
+
+"""
+_RAW_MAGIC_NUMBER = 3230 | ord('\r') << 16 | ord('\n') << 24
+_MAGIC_BYTES = bytes(_RAW_MAGIC_NUMBER >> n & 0xff for n in range(0, 25, 8))
+
+_PYCACHE = '__pycache__'
+
+SOURCE_SUFFIXES = ['.py'] # _setup() adds .pyw as needed.
+
+DEBUG_BYTECODE_SUFFIXES = ['.pyc']
+OPTIMIZED_BYTECODE_SUFFIXES = ['.pyo']
+
+def cache_from_source(path, debug_override=None):
+ """Given the path to a .py file, return the path to its .pyc/.pyo file.
+
+ The .py file does not need to exist; this simply returns the path to the
+ .pyc/.pyo file calculated as if the .py file were imported. The extension
+ will be .pyc unless sys.flags.optimize is non-zero, then it will be .pyo.
+
+ If debug_override is not None, then it must be a boolean and is used in
+ place of sys.flags.optimize.
+
+ If sys.implementation.cache_tag is None then NotImplementedError is raised.
+
+ """
+ debug = not sys.flags.optimize if debug_override is None else debug_override
+ if debug:
+ suffixes = DEBUG_BYTECODE_SUFFIXES
+ else:
+ suffixes = OPTIMIZED_BYTECODE_SUFFIXES
+ head, tail = _path_split(path)
+ base_filename, sep, _ = tail.partition('.')
+ tag = sys.implementation.cache_tag
+ if tag is None:
+ raise NotImplementedError('sys.implementation.cache_tag is None')
+ filename = ''.join([base_filename, sep, tag, suffixes[0]])
+ return _path_join(head, _PYCACHE, filename)
+
+
+def source_from_cache(path):
+ """Given the path to a .pyc./.pyo file, return the path to its .py file.
+
+ The .pyc/.pyo file does not need to exist; this simply returns the path to
+ the .py file calculated to correspond to the .pyc/.pyo file. If path does
+ not conform to PEP 3147 format, ValueError will be raised. If
+ sys.implementation.cache_tag is None then NotImplementedError is raised.
+
+ """
+ if sys.implementation.cache_tag is None:
+ raise NotImplementedError('sys.implementation.cache_tag is None')
+ head, pycache_filename = _path_split(path)
+ head, pycache = _path_split(head)
+ if pycache != _PYCACHE:
+ raise ValueError('{} not bottom-level directory in '
+ '{!r}'.format(_PYCACHE, path))
+ if pycache_filename.count('.') != 2:
+ raise ValueError('expected only 2 dots in '
+ '{!r}'.format(pycache_filename))
+ base_filename = pycache_filename.partition('.')[0]
+ return _path_join(head, base_filename + SOURCE_SUFFIXES[0])
+
+
+def _get_sourcefile(bytecode_path):
+ """Convert a bytecode file path to a source path (if possible).
+
+ This function exists purely for backwards-compatibility for
+ PyImport_ExecCodeModuleWithFilenames() in the C API.
+
+ """
+ if len(bytecode_path) == 0:
+ return None
+ rest, _, extension = bytecode_path.rparition('.')
+ if not rest or extension.lower()[-3:-1] != '.py':
+ return bytecode_path
+
+ try:
+ source_path = source_from_cache(bytecode_path)
+ except (NotImplementedError, ValueError):
+ source_path = bytcode_path[-1:]
+
+ return source_path if _path_isfile(source_stats) else bytecode_path
-code_type = type(_wrap.__code__)
+def _verbose_message(message, *args):
+ """Print the message to stderr if -v/PYTHONVERBOSE is turned on."""
+ if sys.flags.verbose:
+ if not message.startswith(('#', 'import ')):
+ message = '# ' + message
+ print(message.format(*args), file=sys.stderr)
-# Finder/loader utility code ##################################################
def set_package(fxn):
"""Set __package__ on the returned module."""
- def wrapper(*args, **kwargs):
+ def set_package_wrapper(*args, **kwargs):
module = fxn(*args, **kwargs)
- if not hasattr(module, '__package__') or module.__package__ is None:
+ if getattr(module, '__package__', None) is None:
module.__package__ = module.__name__
if not hasattr(module, '__path__'):
module.__package__ = module.__package__.rpartition('.')[0]
return module
- _wrap(wrapper, fxn)
- return wrapper
+ _wrap(set_package_wrapper, fxn)
+ return set_package_wrapper
def set_loader(fxn):
"""Set __loader__ on the returned module."""
- def wrapper(self, *args, **kwargs):
+ def set_loader_wrapper(self, *args, **kwargs):
module = fxn(self, *args, **kwargs)
if not hasattr(module, '__loader__'):
module.__loader__ = self
return module
- _wrap(wrapper, fxn)
- return wrapper
+ _wrap(set_loader_wrapper, fxn)
+ return set_loader_wrapper
def module_for_loader(fxn):
@@ -120,31 +520,54 @@ def module_for_loader(fxn):
The decorated function is passed the module to use instead of the module
name. The module passed in to the function is either from sys.modules if
- it already exists or is a new module which has __name__ set and is inserted
- into sys.modules. If an exception is raised and the decorator created the
- module it is subsequently removed from sys.modules.
+ it already exists or is a new module. If the module is new, then __name__
+ is set the first argument to the method, __loader__ is set to self, and
+ __package__ is set accordingly (if self.is_package() is defined) will be set
+ before it is passed to the decorated function (if self.is_package() does
+ not work for the module it will be set post-load).
+
+ If an exception is raised and the decorator created the module it is
+ subsequently removed from sys.modules.
The decorator assumes that the decorated function takes the module name as
the second argument.
"""
- def decorated(self, fullname, *args, **kwargs):
+ def module_for_loader_wrapper(self, fullname, *args, **kwargs):
module = sys.modules.get(fullname)
- is_reload = bool(module)
+ is_reload = module is not None
if not is_reload:
# This must be done before open() is called as the 'io' module
# implicitly imports 'locale' and would otherwise trigger an
# infinite loop.
- module = imp.new_module(fullname)
+ module = new_module(fullname)
+ # This must be done before putting the module in sys.modules
+ # (otherwise an optimization shortcut in import.c becomes wrong)
+ module.__initializing__ = True
sys.modules[fullname] = module
+ module.__loader__ = self
+ try:
+ is_package = self.is_package(fullname)
+ except (ImportError, AttributeError):
+ pass
+ else:
+ if is_package:
+ module.__package__ = fullname
+ else:
+ module.__package__ = fullname.rpartition('.')[0]
+ else:
+ module.__initializing__ = True
try:
+ # If __package__ was not set above, __import__() will do it later.
return fxn(self, module, *args, **kwargs)
except:
if not is_reload:
del sys.modules[fullname]
raise
- _wrap(decorated, fxn)
- return decorated
+ finally:
+ module.__initializing__ = False
+ _wrap(module_for_loader_wrapper, fxn)
+ return module_for_loader_wrapper
def _check_name(method):
@@ -155,38 +578,51 @@ def _check_name(method):
compared against. If the comparison fails then ImportError is raised.
"""
- def inner(self, name, *args, **kwargs):
- if self._name != name:
- raise ImportError("loader cannot handle %s" % name)
+ def _check_name_wrapper(self, name=None, *args, **kwargs):
+ if name is None:
+ name = self.name
+ elif self.name != name:
+ raise ImportError("loader cannot handle %s" % name, name=name)
return method(self, name, *args, **kwargs)
- _wrap(inner, method)
- return inner
+ _wrap(_check_name_wrapper, method)
+ return _check_name_wrapper
def _requires_builtin(fxn):
"""Decorator to verify the named module is built-in."""
- def wrapper(self, fullname):
+ def _requires_builtin_wrapper(self, fullname):
if fullname not in sys.builtin_module_names:
- raise ImportError("{0} is not a built-in module".format(fullname))
+ raise ImportError("{} is not a built-in module".format(fullname),
+ name=fullname)
return fxn(self, fullname)
- _wrap(wrapper, fxn)
- return wrapper
+ _wrap(_requires_builtin_wrapper, fxn)
+ return _requires_builtin_wrapper
def _requires_frozen(fxn):
"""Decorator to verify the named module is frozen."""
- def wrapper(self, fullname):
- if not imp.is_frozen(fullname):
- raise ImportError("{0} is not a frozen module".format(fullname))
+ def _requires_frozen_wrapper(self, fullname):
+ if not _imp.is_frozen(fullname):
+ raise ImportError("{} is not a frozen module".format(fullname),
+ name=fullname)
return fxn(self, fullname)
- _wrap(wrapper, fxn)
- return wrapper
+ _wrap(_requires_frozen_wrapper, fxn)
+ return _requires_frozen_wrapper
+
+
+def _find_module_shim(self, fullname):
+ """Try to find a loader for the specified module by delegating to
+ self.find_loader()."""
+ # Call find_loader(). If it returns a string (indicating this
+ # is a namespace package portion), generate a warning and
+ # return None.
+ loader, portions = self.find_loader(fullname)
+ if loader is None and len(portions):
+ msg = "Not importing directory {}: missing __init__"
+ _warnings.warn(msg.format(portions[0]), ImportWarning)
+ return loader
-def _suffix_list(suffix_type):
- """Return a list of file suffixes based on the imp file type."""
- return [suffix[0] for suffix in imp.get_suffixes()
- if suffix[2] == suffix_type]
# Loaders #####################################################################
@@ -201,6 +637,10 @@ class BuiltinImporter:
"""
@classmethod
+ def module_repr(cls, module):
+ return "<module '{}' (built-in)>".format(module.__name__)
+
+ @classmethod
def find_module(cls, fullname, path=None):
"""Find the built-in module.
@@ -209,7 +649,7 @@ class BuiltinImporter:
"""
if path is not None:
return None
- return cls if imp.is_builtin(fullname) else None
+ return cls if _imp.is_builtin(fullname) else None
@classmethod
@set_package
@@ -219,7 +659,7 @@ class BuiltinImporter:
"""Load a built-in module."""
is_reload = fullname in sys.modules
try:
- return imp.init_builtin(fullname)
+ return _call_with_frames_removed(_imp.init_builtin, fullname)
except:
if not is_reload and fullname in sys.modules:
del sys.modules[fullname]
@@ -240,7 +680,7 @@ class BuiltinImporter:
@classmethod
@_requires_builtin
def is_package(cls, fullname):
- """Return None as built-in module are never packages."""
+ """Return False as built-in modules are never packages."""
return False
@@ -254,9 +694,13 @@ class FrozenImporter:
"""
@classmethod
+ def module_repr(cls, m):
+ return "<module '{}' (frozen)>".format(m.__name__)
+
+ @classmethod
def find_module(cls, fullname, path=None):
"""Find a frozen module."""
- return cls if imp.is_frozen(fullname) else None
+ return cls if _imp.is_frozen(fullname) else None
@classmethod
@set_package
@@ -266,7 +710,10 @@ class FrozenImporter:
"""Load a frozen module."""
is_reload = fullname in sys.modules
try:
- return imp.init_frozen(fullname)
+ m = _call_with_frames_removed(_imp.init_frozen, fullname)
+ # Let our own module_repr() method produce a suitable repr.
+ del m.__file__
+ return m
except:
if not is_reload and fullname in sys.modules:
del sys.modules[fullname]
@@ -276,7 +723,7 @@ class FrozenImporter:
@_requires_frozen
def get_code(cls, fullname):
"""Return the code object for the frozen module."""
- return imp.get_frozen_object(fullname)
+ return _imp.get_frozen_object(fullname)
@classmethod
@_requires_frozen
@@ -287,40 +734,117 @@ class FrozenImporter:
@classmethod
@_requires_frozen
def is_package(cls, fullname):
- """Return if the frozen module is a package."""
- return imp.is_frozen_package(fullname)
+ """Return True if the frozen module is a package."""
+ return _imp.is_frozen_package(fullname)
+
+
+class WindowsRegistryFinder:
+
+ """Meta path finder for modules declared in the Windows registry.
+ """
+
+ REGISTRY_KEY = (
+ "Software\\Python\\PythonCore\\{sys_version}"
+ "\\Modules\\{fullname}")
+ REGISTRY_KEY_DEBUG = (
+ "Software\\Python\\PythonCore\\{sys_version}"
+ "\\Modules\\{fullname}\\Debug")
+ DEBUG_BUILD = False # Changed in _setup()
+
+ @classmethod
+ def _open_registry(cls, key):
+ try:
+ return _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, key)
+ except WindowsError:
+ return _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, key)
+
+ @classmethod
+ def _search_registry(cls, fullname):
+ if cls.DEBUG_BUILD:
+ registry_key = cls.REGISTRY_KEY_DEBUG
+ else:
+ registry_key = cls.REGISTRY_KEY
+ key = registry_key.format(fullname=fullname,
+ sys_version=sys.version[:3])
+ try:
+ with cls._open_registry(key) as hkey:
+ filepath = _winreg.QueryValue(hkey, "")
+ except WindowsError:
+ return None
+ return filepath
+
+ @classmethod
+ def find_module(cls, fullname, path=None):
+ """Find module named in the registry."""
+ filepath = cls._search_registry(fullname)
+ if filepath is None:
+ return None
+ try:
+ _os.stat(filepath)
+ except OSError:
+ return None
+ for loader, suffixes, _ in _get_supported_file_loaders():
+ if filepath.endswith(tuple(suffixes)):
+ return loader(fullname, filepath)
class _LoaderBasics:
"""Base class of common code needed by both SourceLoader and
- _SourcelessFileLoader."""
+ SourcelessFileLoader."""
def is_package(self, fullname):
"""Concrete implementation of InspectLoader.is_package by checking if
the path returned by get_filename has a filename of '__init__.py'."""
- filename = self.get_filename(fullname).rpartition(path_sep)[2]
- return filename.rsplit('.', 1)[0] == '__init__'
+ filename = _path_split(self.get_filename(fullname))[1]
+ filename_base = filename.rsplit('.', 1)[0]
+ tail_name = fullname.rpartition('.')[2]
+ return filename_base == '__init__' and tail_name != '__init__'
- def _bytes_from_bytecode(self, fullname, data, source_mtime):
+ def _bytes_from_bytecode(self, fullname, data, bytecode_path, source_stats):
"""Return the marshalled bytes from bytecode, verifying the magic
- number and timestamp along the way.
+ number, timestamp and source size along the way.
- If source_mtime is None then skip the timestamp check.
+ If source_stats is None then skip the timestamp check.
"""
magic = data[:4]
raw_timestamp = data[4:8]
- if len(magic) != 4 or magic != imp.get_magic():
- raise ImportError("bad magic number in {}".format(fullname))
+ raw_size = data[8:12]
+ if magic != _MAGIC_BYTES:
+ msg = 'bad magic number in {!r}: {!r}'.format(fullname, magic)
+ raise ImportError(msg, name=fullname, path=bytecode_path)
elif len(raw_timestamp) != 4:
- raise EOFError("bad timestamp in {}".format(fullname))
- elif source_mtime is not None:
- if marshal._r_long(raw_timestamp) != source_mtime:
- raise ImportError("bytecode is stale for {}".format(fullname))
+ message = 'bad timestamp in {}'.format(fullname)
+ _verbose_message(message)
+ raise EOFError(message)
+ elif len(raw_size) != 4:
+ message = 'bad size in {}'.format(fullname)
+ _verbose_message(message)
+ raise EOFError(message)
+ if source_stats is not None:
+ try:
+ source_mtime = int(source_stats['mtime'])
+ except KeyError:
+ pass
+ else:
+ if _r_long(raw_timestamp) != source_mtime:
+ message = 'bytecode is stale for {}'.format(fullname)
+ _verbose_message(message)
+ raise ImportError(message, name=fullname,
+ path=bytecode_path)
+ try:
+ source_size = source_stats['size'] & 0xFFFFFFFF
+ except KeyError:
+ pass
+ else:
+ if _r_long(raw_size) != source_size:
+ raise ImportError(
+ "bytecode is stale for {}".format(fullname),
+ name=fullname, path=bytecode_path)
# Can't return the code object as errors from marshal loading need to
# propagate even when source is available.
- return data[8:]
+ return data[12:]
@module_for_loader
def _load_module(self, module, *, sourceless=False):
@@ -330,16 +854,19 @@ class _LoaderBasics:
code_object = self.get_code(name)
module.__file__ = self.get_filename(name)
if not sourceless:
- module.__cached__ = imp.cache_from_source(module.__file__)
+ try:
+ module.__cached__ = cache_from_source(module.__file__)
+ except NotImplementedError:
+ module.__cached__ = module.__file__
else:
module.__cached__ = module.__file__
module.__package__ = name
if self.is_package(name):
- module.__path__ = [module.__file__.rsplit(path_sep, 1)[0]]
+ module.__path__ = [_path_split(module.__file__)[0]]
else:
module.__package__ = module.__package__.rpartition('.')[0]
module.__loader__ = self
- exec(code_object, module.__dict__)
+ _call_with_frames_removed(exec, code_object, module.__dict__)
return module
@@ -348,11 +875,30 @@ class SourceLoader(_LoaderBasics):
def path_mtime(self, path):
"""Optional method that returns the modification time (an int) for the
specified path, where path is a str.
+ """
+ raise NotImplementedError
+
+ def path_stats(self, path):
+ """Optional method returning a metadata dict for the specified path
+ to by the path (str).
+ Possible keys:
+ - 'mtime' (mandatory) is the numeric timestamp of last source
+ code modification;
+ - 'size' (optional) is the size in bytes of the source code.
Implementing this method allows the loader to read bytecode files.
+ """
+ return {'mtime': self.path_mtime(path)}
+ def _cache_bytecode(self, source_path, cache_path, data):
+ """Optional method which writes data (bytes) to a file path (a str).
+
+ Implementing this method allows for the writing of bytecode files.
+
+ The source path is needed in order to correctly transfer permissions
"""
- raise NotImplementedError
+ # For backwards compatibility, we delegate to set_data()
+ return self.set_data(cache_path, data)
def set_data(self, path, data):
"""Optional method which writes data (bytes) to a file path (a str).
@@ -369,28 +915,42 @@ class SourceLoader(_LoaderBasics):
path = self.get_filename(fullname)
try:
source_bytes = self.get_data(path)
- except IOError:
- raise ImportError("source not available through get_data()")
- encoding = tokenize.detect_encoding(_io.BytesIO(source_bytes).readline)
+ except IOError as exc:
+ raise ImportError("source not available through get_data()",
+ name=fullname) from exc
+ readsource = _io.BytesIO(source_bytes).readline
+ try:
+ encoding = tokenize.detect_encoding(readsource)
+ except SyntaxError as exc:
+ raise ImportError("Failed to detect encoding",
+ name=fullname) from exc
newline_decoder = _io.IncrementalNewlineDecoder(None, True)
- return newline_decoder.decode(source_bytes.decode(encoding[0]))
+ try:
+ return newline_decoder.decode(source_bytes.decode(encoding[0]))
+ except UnicodeDecodeError as exc:
+ raise ImportError("Failed to decode source file",
+ name=fullname) from exc
def get_code(self, fullname):
"""Concrete implementation of InspectLoader.get_code.
- Reading of bytecode requires path_mtime to be implemented. To write
+ Reading of bytecode requires path_stats to be implemented. To write
bytecode, set_data must also be implemented.
"""
source_path = self.get_filename(fullname)
- bytecode_path = imp.cache_from_source(source_path)
source_mtime = None
- if bytecode_path is not None:
+ try:
+ bytecode_path = cache_from_source(source_path)
+ except NotImplementedError:
+ bytecode_path = None
+ else:
try:
- source_mtime = self.path_mtime(source_path)
+ st = self.path_stats(source_path)
except NotImplementedError:
pass
else:
+ source_mtime = int(st['mtime'])
try:
data = self.get_data(bytecode_path)
except IOError:
@@ -398,29 +958,37 @@ class SourceLoader(_LoaderBasics):
else:
try:
bytes_data = self._bytes_from_bytecode(fullname, data,
- source_mtime)
+ bytecode_path,
+ st)
except (ImportError, EOFError):
pass
else:
+ _verbose_message('{} matches {}', bytecode_path,
+ source_path)
found = marshal.loads(bytes_data)
- if isinstance(found, code_type):
+ if isinstance(found, _code_type):
+ _imp._fix_co_filename(found, source_path)
+ _verbose_message('code object from {}',
+ bytecode_path)
return found
else:
msg = "Non-code object in {}"
- raise ImportError(msg.format(bytecode_path))
+ raise ImportError(msg.format(bytecode_path),
+ name=fullname, path=bytecode_path)
source_bytes = self.get_data(source_path)
- code_object = compile(source_bytes, source_path, 'exec',
- dont_inherit=True)
+ code_object = _call_with_frames_removed(compile,
+ source_bytes, source_path, 'exec',
+ dont_inherit=True)
+ _verbose_message('code object from {}', source_path)
if (not sys.dont_write_bytecode and bytecode_path is not None and
- source_mtime is not None):
- # If e.g. Jython ever implements imp.cache_from_source to have
- # their own cached file format, this block of code will most likely
- # throw an exception.
- data = bytearray(imp.get_magic())
- data.extend(marshal._w_long(source_mtime))
+ source_mtime is not None):
+ data = bytearray(_MAGIC_BYTES)
+ data.extend(_w_long(source_mtime))
+ data.extend(_w_long(len(source_bytes)))
data.extend(marshal.dumps(code_object))
try:
- self.set_data(bytecode_path, data)
+ self._cache_bytecode(source_path, bytecode_path, data)
+ _verbose_message('wrote {!r}', bytecode_path)
except NotImplementedError:
pass
return code_object
@@ -436,7 +1004,7 @@ class SourceLoader(_LoaderBasics):
return self._load_module(fullname)
-class _FileLoader:
+class FileLoader:
"""Base file loader class which implements the loader protocol methods that
require file system usage."""
@@ -444,13 +1012,20 @@ class _FileLoader:
def __init__(self, fullname, path):
"""Cache the module name and the path to the file found by the
finder."""
- self._name = fullname
- self._path = path
+ self.name = fullname
+ self.path = path
+
+ @_check_name
+ def load_module(self, fullname):
+ """Load a module from a file."""
+ # Issue #14857: Avoid the zero-argument form so the implementation
+ # of that form can be updated without breaking the frozen module
+ return super(FileLoader, self).load_module(fullname)
@_check_name
def get_filename(self, fullname):
"""Return the path to the source file as found by the finder."""
- return self._path
+ return self.path
def get_data(self, path):
"""Return the data from path as raw bytes."""
@@ -458,52 +1033,56 @@ class _FileLoader:
return file.read()
-class _SourceFileLoader(_FileLoader, SourceLoader):
+class SourceFileLoader(FileLoader, SourceLoader):
"""Concrete implementation of SourceLoader using the file system."""
- def path_mtime(self, path):
- """Return the modification time for the path."""
- return int(_os.stat(path).st_mtime)
+ def path_stats(self, path):
+ """Return the metadata for the path."""
+ st = _os.stat(path)
+ return {'mtime': st.st_mtime, 'size': st.st_size}
- def set_data(self, path, data):
+ def _cache_bytecode(self, source_path, bytecode_path, data):
+ # Adapt between the two APIs
+ try:
+ mode = _os.stat(source_path).st_mode
+ except OSError:
+ mode = 0o666
+ # We always ensure write access so we can update cached files
+ # later even when the source files are read-only on Windows (#6074)
+ mode |= 0o200
+ return self.set_data(bytecode_path, data, _mode=mode)
+
+ def set_data(self, path, data, *, _mode=0o666):
"""Write bytes data to a file."""
- parent, _, filename = path.rpartition(path_sep)
+ parent, filename = _path_split(path)
path_parts = []
# Figure out what directories are missing.
while parent and not _path_isdir(parent):
- parent, _, part = parent.rpartition(path_sep)
+ parent, part = _path_split(parent)
path_parts.append(part)
# Create needed directories.
for part in reversed(path_parts):
parent = _path_join(parent, part)
try:
_os.mkdir(parent)
- except OSError as exc:
+ except FileExistsError:
# Probably another Python process already created the dir.
- if exc.errno == errno.EEXIST:
- continue
- else:
- raise
- except IOError as exc:
- # If can't get proper access, then just forget about writing
- # the data.
- if exc.errno == errno.EACCES:
- return
- else:
- raise
- try:
- with _io.FileIO(path, 'wb') as file:
- file.write(data)
- except IOError as exc:
- # Don't worry if you can't write bytecode.
- if exc.errno == errno.EACCES:
+ continue
+ except OSError as exc:
+ # Could be a permission error, read-only filesystem: just forget
+ # about writing the data.
+ _verbose_message('could not create {!r}: {!r}', parent, exc)
return
- else:
- raise
+ try:
+ _write_atomic(path, data, _mode)
+ _verbose_message('created {!r}', path)
+ except OSError as exc:
+ # Same as above: just don't write the bytecode.
+ _verbose_message('could not create {!r}: {!r}', path, exc)
-class _SourcelessFileLoader(_FileLoader, _LoaderBasics):
+class SourcelessFileLoader(FileLoader, _LoaderBasics):
"""Loader which handles sourceless file imports."""
@@ -513,19 +1092,25 @@ class _SourcelessFileLoader(_FileLoader, _LoaderBasics):
def get_code(self, fullname):
path = self.get_filename(fullname)
data = self.get_data(path)
- bytes_data = self._bytes_from_bytecode(fullname, data, None)
+ bytes_data = self._bytes_from_bytecode(fullname, data, path, None)
found = marshal.loads(bytes_data)
- if isinstance(found, code_type):
+ if isinstance(found, _code_type):
+ _verbose_message('code object from {!r}', path)
return found
else:
- raise ImportError("Non-code object in {}".format(path))
+ raise ImportError("Non-code object in {}".format(path),
+ name=fullname, path=path)
def get_source(self, fullname):
"""Return None as there is no source code."""
return None
-class _ExtensionFileLoader:
+# Filled in by _setup().
+EXTENSION_SUFFIXES = []
+
+
+class ExtensionFileLoader:
"""Loader for extension modules.
@@ -534,14 +1119,8 @@ class _ExtensionFileLoader:
"""
def __init__(self, name, path):
- """Initialize the loader.
-
- If is_pkg is True then an exception is raised as extension modules
- cannot be the __init__ module for an extension module.
-
- """
- self._name = name
- self._path = path
+ self.name = name
+ self.path = path
@_check_name
@set_package
@@ -550,297 +1129,525 @@ class _ExtensionFileLoader:
"""Load an extension module."""
is_reload = fullname in sys.modules
try:
- return imp.load_dynamic(fullname, self._path)
+ module = _call_with_frames_removed(_imp.load_dynamic,
+ fullname, self.path)
+ _verbose_message('extension module loaded from {!r}', self.path)
+ if self.is_package(fullname) and not hasattr(module, '__path__'):
+ module.__path__ = [_path_split(self.path)[0]]
+ return module
except:
if not is_reload and fullname in sys.modules:
del sys.modules[fullname]
raise
- @_check_name
def is_package(self, fullname):
- """Return False as an extension module can never be a package."""
- return False
+ """Return True if the extension module is a package."""
+ file_name = _path_split(self.path)[1]
+ return any(file_name == '__init__' + suffix
+ for suffix in EXTENSION_SUFFIXES)
- @_check_name
def get_code(self, fullname):
"""Return None as an extension module cannot create a code object."""
return None
- @_check_name
def get_source(self, fullname):
"""Return None as extension modules have no source code."""
return None
+class _NamespacePath:
+ """Represents a namespace package's path. It uses the module name
+ to find its parent module, and from there it looks up the parent's
+ __path__. When this changes, the module's own path is recomputed,
+ using path_finder. For top-leve modules, the parent module's path
+ is sys.path."""
+
+ def __init__(self, name, path, path_finder):
+ self._name = name
+ self._path = path
+ self._last_parent_path = tuple(self._get_parent_path())
+ self._path_finder = path_finder
+
+ def _find_parent_path_names(self):
+ """Returns a tuple of (parent-module-name, parent-path-attr-name)"""
+ parent, dot, me = self._name.rpartition('.')
+ if dot == '':
+ # This is a top-level module. sys.path contains the parent path.
+ return 'sys', 'path'
+ # Not a top-level module. parent-module.__path__ contains the
+ # parent path.
+ return parent, '__path__'
+
+ def _get_parent_path(self):
+ parent_module_name, path_attr_name = self._find_parent_path_names()
+ return getattr(sys.modules[parent_module_name], path_attr_name)
+
+ def _recalculate(self):
+ # If the parent's path has changed, recalculate _path
+ parent_path = tuple(self._get_parent_path()) # Make a copy
+ if parent_path != self._last_parent_path:
+ loader, new_path = self._path_finder(self._name, parent_path)
+ # Note that no changes are made if a loader is returned, but we
+ # do remember the new parent path
+ if loader is None:
+ self._path = new_path
+ self._last_parent_path = parent_path # Save the copy
+ return self._path
+
+ def __iter__(self):
+ return iter(self._recalculate())
+
+ def __len__(self):
+ return len(self._recalculate())
+
+ def __repr__(self):
+ return "_NamespacePath({!r})".format(self._path)
+
+ def __contains__(self, item):
+ return item in self._recalculate()
+
+ def append(self, item):
+ self._path.append(item)
+
+
+class NamespaceLoader:
+ def __init__(self, name, path, path_finder):
+ self._path = _NamespacePath(name, path, path_finder)
+
+ @classmethod
+ def module_repr(cls, module):
+ return "<module '{}' (namespace)>".format(module.__name__)
+
+ @module_for_loader
+ def load_module(self, module):
+ """Load a namespace module."""
+ _verbose_message('namespace module loaded with path {!r}', self._path)
+ module.__path__ = self._path
+ return module
+
+
# Finders #####################################################################
class PathFinder:
- """Meta path finder for sys.(path|path_hooks|path_importer_cache)."""
+ """Meta path finder for sys.path and package __path__ attributes."""
+
+ @classmethod
+ def invalidate_caches(cls):
+ """Call the invalidate_caches() method on all path entry finders
+ stored in sys.path_importer_caches (where implemented)."""
+ for finder in sys.path_importer_cache.values():
+ if hasattr(finder, 'invalidate_caches'):
+ finder.invalidate_caches()
@classmethod
- def _path_hooks(cls, path, hooks=None):
+ def _path_hooks(cls, path):
"""Search sequence of hooks for a finder for 'path'.
If 'hooks' is false then use sys.path_hooks.
"""
- if not hooks:
- hooks = sys.path_hooks
- for hook in hooks:
+ if not sys.path_hooks:
+ _warnings.warn('sys.path_hooks is empty', ImportWarning)
+ for hook in sys.path_hooks:
try:
return hook(path)
except ImportError:
continue
else:
- raise ImportError("no path hook found for {0}".format(path))
+ return None
@classmethod
- def _path_importer_cache(cls, path, default=None):
- """Get the finder for the path from sys.path_importer_cache.
-
- If the path is not in the cache, find the appropriate finder and cache
- it. If None is cached, get the default finder and cache that
- (if applicable).
+ def _path_importer_cache(cls, path):
+ """Get the finder for the path entry from sys.path_importer_cache.
- Because of NullImporter, some finder should be returned. The only
- explicit fail case is if None is cached but the path cannot be used for
- the default hook, for which ImportError is raised.
+ If the path entry is not in the cache, find the appropriate finder
+ and cache it. If no finder is available, store None.
"""
+ if path == '':
+ path = '.'
try:
finder = sys.path_importer_cache[path]
except KeyError:
finder = cls._path_hooks(path)
sys.path_importer_cache[path] = finder
- else:
- if finder is None and default:
- # Raises ImportError on failure.
- finder = default(path)
- sys.path_importer_cache[path] = finder
return finder
@classmethod
+ def _get_loader(cls, fullname, path):
+ """Find the loader or namespace_path for this module/package name."""
+ # If this ends up being a namespace package, namespace_path is
+ # the list of paths that will become its __path__
+ namespace_path = []
+ for entry in path:
+ finder = cls._path_importer_cache(entry)
+ if finder is not None:
+ if hasattr(finder, 'find_loader'):
+ loader, portions = finder.find_loader(fullname)
+ else:
+ loader = finder.find_module(fullname)
+ portions = []
+ if loader is not None:
+ # We found a loader: return it immediately.
+ return loader, namespace_path
+ # This is possibly part of a namespace package.
+ # Remember these path entries (if any) for when we
+ # create a namespace package, and continue iterating
+ # on path.
+ namespace_path.extend(portions)
+ else:
+ return None, namespace_path
+
+ @classmethod
def find_module(cls, fullname, path=None):
"""Find the module on sys.path or 'path' based on sys.path_hooks and
sys.path_importer_cache."""
- if not path:
+ if path is None:
path = sys.path
- for entry in path:
- try:
- finder = cls._path_importer_cache(entry)
- except ImportError:
- continue
- if finder:
- loader = finder.find_module(fullname)
- if loader:
- return loader
+ loader, namespace_path = cls._get_loader(fullname, path)
+ if loader is not None:
+ return loader
else:
- return None
+ if namespace_path:
+ # We found at least one namespace path. Return a
+ # loader which can create the namespace package.
+ return NamespaceLoader(fullname, namespace_path, cls._get_loader)
+ else:
+ return None
-class _FileFinder:
+class FileFinder:
"""File-based finder.
- Constructor takes a list of objects detailing what file extensions their
- loader supports along with whether it can be used for a package.
+ Interactions with the file system are cached for performance, being
+ refreshed when the directory the finder is handling has been modified.
"""
def __init__(self, path, *details):
- """Initialize with finder details."""
- packages = []
- modules = []
- for detail in details:
- modules.extend((suffix, detail.loader) for suffix in detail.suffixes)
- if detail.supports_packages:
- packages.extend((suffix, detail.loader)
- for suffix in detail.suffixes)
- self.packages = packages
- self.modules = modules
- self.path = path
-
- def find_module(self, fullname):
- """Try to find a loader for the specified module."""
+ """Initialize with the path to search on and a variable number of
+ 3-tuples containing the loader, file suffixes the loader recognizes,
+ and a boolean of whether the loader handles packages."""
+ loaders = []
+ for loader, suffixes in details:
+ loaders.extend((suffix, loader) for suffix in suffixes)
+ self._loaders = loaders
+ # Base (directory) path
+ self.path = path or '.'
+ self._path_mtime = -1
+ self._path_cache = set()
+ self._relaxed_path_cache = set()
+
+ def invalidate_caches(self):
+ """Invalidate the directory mtime."""
+ self._path_mtime = -1
+
+ find_module = _find_module_shim
+
+ def find_loader(self, fullname):
+ """Try to find a loader for the specified module, or the namespace
+ package portions. Returns (loader, list-of-portions)."""
+ is_namespace = False
tail_module = fullname.rpartition('.')[2]
- base_path = _path_join(self.path, tail_module)
- if _path_isdir(base_path) and _case_ok(self.path, tail_module):
- for suffix, loader in self.packages:
- init_filename = '__init__' + suffix
- full_path = _path_join(base_path, init_filename)
- if (_path_isfile(full_path) and
- _case_ok(base_path, init_filename)):
- return loader(fullname, full_path)
- else:
- msg = "Not importing directory {}: missing __init__"
- _warnings.warn(msg.format(base_path), ImportWarning)
- for suffix, loader in self.modules:
- mod_filename = tail_module + suffix
- full_path = _path_join(self.path, mod_filename)
- if _path_isfile(full_path) and _case_ok(self.path, mod_filename):
- return loader(fullname, full_path)
- return None
-
-class _SourceFinderDetails:
-
- loader = _SourceFileLoader
- supports_packages = True
-
- def __init__(self):
- self.suffixes = _suffix_list(imp.PY_SOURCE)
-
-class _SourcelessFinderDetails:
-
- loader = _SourcelessFileLoader
- supports_packages = True
+ try:
+ mtime = _os.stat(self.path).st_mtime
+ except OSError:
+ mtime = -1
+ if mtime != self._path_mtime:
+ self._fill_cache()
+ self._path_mtime = mtime
+ # tail_module keeps the original casing, for __file__ and friends
+ if _relax_case():
+ cache = self._relaxed_path_cache
+ cache_module = tail_module.lower()
+ else:
+ cache = self._path_cache
+ cache_module = tail_module
+ # Check if the module is the name of a directory (and thus a package).
+ if cache_module in cache:
+ base_path = _path_join(self.path, tail_module)
+ if _path_isdir(base_path):
+ for suffix, loader in self._loaders:
+ init_filename = '__init__' + suffix
+ full_path = _path_join(base_path, init_filename)
+ if _path_isfile(full_path):
+ return (loader(fullname, full_path), [base_path])
+ else:
+ # A namespace package, return the path if we don't also
+ # find a module in the next section.
+ is_namespace = True
+ # Check for a file w/ a proper suffix exists.
+ for suffix, loader in self._loaders:
+ if cache_module + suffix in cache:
+ full_path = _path_join(self.path, tail_module + suffix)
+ if _path_isfile(full_path):
+ return (loader(fullname, full_path), [])
+ if is_namespace:
+ return (None, [base_path])
+ return (None, [])
+
+ def _fill_cache(self):
+ """Fill the cache of potential modules and packages for this directory."""
+ path = self.path
+ try:
+ contents = _os.listdir(path)
+ except FileNotFoundError:
+ # Directory has been removed since last import
+ contents = []
+ # We store two cached versions, to handle runtime changes of the
+ # PYTHONCASEOK environment variable.
+ if not sys.platform.startswith('win'):
+ self._path_cache = set(contents)
+ else:
+ # Windows users can import modules with case-insensitive file
+ # suffixes (for legacy reasons). Make the suffix lowercase here
+ # so it's done once instead of for every import. This is safe as
+ # the specified suffixes to check against are always specified in a
+ # case-sensitive manner.
+ lower_suffix_contents = set()
+ for item in contents:
+ name, dot, suffix = item.partition('.')
+ if dot:
+ new_name = '{}.{}'.format(name, suffix.lower())
+ else:
+ new_name = name
+ lower_suffix_contents.add(new_name)
+ self._path_cache = lower_suffix_contents
+ if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS):
+ self._relaxed_path_cache = set(fn.lower() for fn in contents)
- def __init__(self):
- self.suffixes = _suffix_list(imp.PY_COMPILED)
+ @classmethod
+ def path_hook(cls, *loader_details):
+ """A class method which returns a closure to use on sys.path_hook
+ which will return an instance using the specified loaders and the path
+ called on the closure.
+ If the path called on the closure is not a directory, ImportError is
+ raised.
-class _ExtensionFinderDetails:
+ """
+ def path_hook_for_FileFinder(path):
+ """Path hook for importlib.machinery.FileFinder."""
+ if not _path_isdir(path):
+ raise ImportError("only directories are supported", path=path)
+ return cls(path, *loader_details)
- loader = _ExtensionFileLoader
- supports_packages = False
+ return path_hook_for_FileFinder
- def __init__(self):
- self.suffixes = _suffix_list(imp.C_EXTENSION)
+ def __repr__(self):
+ return "FileFinder(%r)" % (self.path,)
# Import itself ###############################################################
-def _file_path_hook(path):
- """If the path is a directory, return a file-based finder."""
- if _path_isdir(path):
- return _FileFinder(path, _ExtensionFinderDetails(),
- _SourceFinderDetails(),
- _SourcelessFinderDetails())
- else:
- raise ImportError("only directories are supported")
+class _ImportLockContext:
+ """Context manager for the import lock."""
-_DEFAULT_PATH_HOOK = _file_path_hook
+ def __enter__(self):
+ """Acquire the import lock."""
+ _imp.acquire_lock()
-class _DefaultPathFinder(PathFinder):
+ def __exit__(self, exc_type, exc_value, exc_traceback):
+ """Release the import lock regardless of any raised exceptions."""
+ _imp.release_lock()
- """Subclass of PathFinder that implements implicit semantics for
- __import__."""
- @classmethod
- def _path_hooks(cls, path):
- """Search sys.path_hooks as well as implicit path hooks."""
- try:
- return super()._path_hooks(path)
- except ImportError:
- implicit_hooks = [_DEFAULT_PATH_HOOK, imp.NullImporter]
- return super()._path_hooks(path, implicit_hooks)
+def _resolve_name(name, package, level):
+ """Resolve a relative module name to an absolute one."""
+ bits = package.rsplit('.', level - 1)
+ if len(bits) < level:
+ raise ValueError('attempted relative import beyond top-level package')
+ base = bits[0]
+ return '{}.{}'.format(base, name) if name else base
- @classmethod
- def _path_importer_cache(cls, path):
- """Use the default path hook when None is stored in
- sys.path_importer_cache."""
- return super()._path_importer_cache(path, _DEFAULT_PATH_HOOK)
+def _find_module(name, path):
+ """Find a module's loader."""
+ if not sys.meta_path:
+ _warnings.warn('sys.meta_path is empty', ImportWarning)
+ for finder in sys.meta_path:
+ with _ImportLockContext():
+ loader = finder.find_module(name, path)
+ if loader is not None:
+ # The parent import may have already imported this module.
+ if name not in sys.modules:
+ return loader
+ else:
+ return sys.modules[name].__loader__
+ else:
+ return None
-class _ImportLockContext:
- """Context manager for the import lock."""
+def _sanity_check(name, package, level):
+ """Verify arguments are "sane"."""
+ if not isinstance(name, str):
+ raise TypeError("module name must be str, not {}".format(type(name)))
+ if level < 0:
+ raise ValueError('level must be >= 0')
+ if package:
+ if not isinstance(package, str):
+ raise TypeError("__package__ not set to a string")
+ elif package not in sys.modules:
+ msg = ("Parent module {!r} not loaded, cannot perform relative "
+ "import")
+ raise SystemError(msg.format(package))
+ if not name and level == 0:
+ raise ValueError("Empty module name")
- def __enter__(self):
- """Acquire the import lock."""
- imp.acquire_lock()
- def __exit__(self, exc_type, exc_value, exc_traceback):
- """Release the import lock regardless of any raised exceptions."""
- imp.release_lock()
+_ERR_MSG = 'No module named {!r}'
+def _find_and_load_unlocked(name, import_):
+ path = None
+ parent = name.rpartition('.')[0]
+ if parent:
+ if parent not in sys.modules:
+ _call_with_frames_removed(import_, parent)
+ # Crazy side-effects!
+ if name in sys.modules:
+ return sys.modules[name]
+ # Backwards-compatibility; be nicer to skip the dict lookup.
+ parent_module = sys.modules[parent]
+ try:
+ path = parent_module.__path__
+ except AttributeError:
+ msg = (_ERR_MSG + '; {} is not a package').format(name, parent)
+ raise ImportError(msg, name=name)
+ loader = _find_module(name, path)
+ if loader is None:
+ exc = ImportError(_ERR_MSG.format(name), name=name)
+ # TODO(brett): switch to a proper ModuleNotFound exception in Python
+ # 3.4.
+ exc._not_found = True
+ raise exc
+ elif name not in sys.modules:
+ # The parent import may have already imported this module.
+ loader.load_module(name)
+ _verbose_message('import {!r} # {!r}', name, loader)
+ # Backwards-compatibility; be nicer to skip the dict lookup.
+ module = sys.modules[name]
+ if parent:
+ # Set the module as an attribute on its parent.
+ parent_module = sys.modules[parent]
+ setattr(parent_module, name.rpartition('.')[2], module)
+ # Set __package__ if the loader did not.
+ if getattr(module, '__package__', None) is None:
+ try:
+ module.__package__ = module.__name__
+ if not hasattr(module, '__path__'):
+ module.__package__ = module.__package__.rpartition('.')[0]
+ except AttributeError:
+ pass
+ # Set loader if need be.
+ if not hasattr(module, '__loader__'):
+ try:
+ module.__loader__ = loader
+ except AttributeError:
+ pass
+ return module
-_IMPLICIT_META_PATH = [BuiltinImporter, FrozenImporter, _DefaultPathFinder]
-_ERR_MSG = 'No module named {}'
+def _find_and_load(name, import_):
+ """Find and load the module, and release the import lock."""
+ try:
+ lock = _get_module_lock(name)
+ finally:
+ _imp.release_lock()
+ lock.acquire()
+ try:
+ return _find_and_load_unlocked(name, import_)
+ finally:
+ lock.release()
+
def _gcd_import(name, package=None, level=0):
"""Import and return the module based on its name, the package the call is
being made from, and the level adjustment.
This function represents the greatest common denominator of functionality
- between import_module and __import__. This includes settting __package__ if
+ between import_module and __import__. This includes setting __package__ if
the loader did not.
"""
- if package:
- if not hasattr(package, 'rindex'):
- raise ValueError("__package__ not set to a string")
- elif package not in sys.modules:
- msg = ("Parent module {0!r} not loaded, cannot perform relative "
- "import")
- raise SystemError(msg.format(package))
- if not name and level == 0:
- raise ValueError("Empty module name")
+ _sanity_check(name, package, level)
if level > 0:
- dot = len(package)
- for x in range(level, 1, -1):
- try:
- dot = package.rindex('.', 0, dot)
- except ValueError:
- raise ValueError("attempted relative import beyond "
- "top-level package")
- if name:
- name = "{0}.{1}".format(package[:dot], name)
- else:
- name = package[:dot]
- with _ImportLockContext():
- try:
- module = sys.modules[name]
- if module is None:
- message = ("import of {} halted; "
- "None in sys.modules".format(name))
- raise ImportError(message)
- return module
- except KeyError:
- pass
- parent = name.rpartition('.')[0]
- path = None
- if parent:
- if parent not in sys.modules:
- _gcd_import(parent)
- # Backwards-compatibility; be nicer to skip the dict lookup.
- parent_module = sys.modules[parent]
- try:
- path = parent_module.__path__
- except AttributeError:
- msg = (_ERR_MSG + '; {} is not a package').format(name, parent)
- raise ImportError(msg)
- meta_path = sys.meta_path + _IMPLICIT_META_PATH
- for finder in meta_path:
- loader = finder.find_module(name, path)
- if loader is not None:
- # The parent import may have already imported this module.
- if name not in sys.modules:
- loader.load_module(name)
- break
- else:
- raise ImportError(_ERR_MSG.format(name))
- # Backwards-compatibility; be nicer to skip the dict lookup.
- module = sys.modules[name]
- if parent:
- # Set the module as an attribute on its parent.
- setattr(parent_module, name.rpartition('.')[2], module)
- # Set __package__ if the loader did not.
- if not hasattr(module, '__package__') or module.__package__ is None:
- # Watch out for what comes out of sys.modules to not be a module,
- # e.g. an int.
- try:
- module.__package__ = module.__name__
- if not hasattr(module, '__path__'):
- module.__package__ = module.__package__.rpartition('.')[0]
- except AttributeError:
- pass
- return module
+ name = _resolve_name(name, package, level)
+ _imp.acquire_lock()
+ if name not in sys.modules:
+ return _find_and_load(name, _gcd_import)
+ module = sys.modules[name]
+ if module is None:
+ _imp.release_lock()
+ message = ("import of {} halted; "
+ "None in sys.modules".format(name))
+ raise ImportError(message, name=name)
+ _lock_unlock_module(name)
+ return module
+
+def _handle_fromlist(module, fromlist, import_):
+ """Figure out what __import__ should return.
+
+ The import_ parameter is a callable which takes the name of module to
+ import. It is required to decouple the function from assuming importlib's
+ import implementation is desired.
+
+ """
+ # The hell that is fromlist ...
+ # If a package was imported, try to import stuff from fromlist.
+ if hasattr(module, '__path__'):
+ if '*' in fromlist:
+ fromlist = list(fromlist)
+ fromlist.remove('*')
+ if hasattr(module, '__all__'):
+ fromlist.extend(module.__all__)
+ for x in fromlist:
+ if not hasattr(module, x):
+ from_name = '{}.{}'.format(module.__name__, x)
+ try:
+ _call_with_frames_removed(import_, from_name)
+ except ImportError as exc:
+ # Backwards-compatibility dictates we ignore failed
+ # imports triggered by fromlist for modules that don't
+ # exist.
+ # TODO(brett): In Python 3.4, have import raise
+ # ModuleNotFound and catch that.
+ if getattr(exc, '_not_found', False):
+ if exc.name == from_name:
+ continue
+ raise
+ return module
+
+
+def _calc___package__(globals):
+ """Calculate what __package__ should be.
+
+ __package__ is not guaranteed to be defined or could be set to None
+ to represent that its proper value is unknown.
+
+ """
+ package = globals.get('__package__')
+ if package is None:
+ package = globals['__name__']
+ if '__path__' not in globals:
+ package = package.rpartition('.')[0]
+ return package
-def __import__(name, globals={}, locals={}, fromlist=[], level=0):
+def _get_supported_file_loaders():
+ """Returns a list of file-based module loaders.
+
+ Each item is a tuple (loader, suffixes, allow_packages).
+ """
+ extensions = ExtensionFileLoader, _imp.extension_suffixes()
+ source = SourceFileLoader, SOURCE_SUFFIXES
+ bytecode = SourcelessFileLoader, BYTECODE_SUFFIXES
+ return [extensions, source, bytecode]
+
+
+def __import__(name, globals=None, locals=None, fromlist=(), level=0):
"""Import a module.
The 'globals' argument is used to infer where the import is occuring from
@@ -851,40 +1658,112 @@ def __import__(name, globals={}, locals={}, fromlist=[], level=0):
import (e.g. ``from ..pkg import mod`` would have a 'level' of 2).
"""
- if not hasattr(name, 'rpartition'):
- raise TypeError("module name must be str, not {}".format(type(name)))
if level == 0:
module = _gcd_import(name)
else:
- # __package__ is not guaranteed to be defined or could be set to None
- # to represent that it's proper value is unknown
- package = globals.get('__package__')
- if package is None:
- package = globals['__name__']
- if '__path__' not in globals:
- package = package.rpartition('.')[0]
+ globals_ = globals if globals is not None else {}
+ package = _calc___package__(globals_)
module = _gcd_import(name, package, level)
- # The hell that is fromlist ...
if not fromlist:
# Return up to the first dot in 'name'. This is complicated by the fact
# that 'name' may be relative.
if level == 0:
- return sys.modules[name.partition('.')[0]]
+ return _gcd_import(name.partition('.')[0])
elif not name:
return module
else:
+ # Figure out where to slice the module's name up to the first dot
+ # in 'name'.
cut_off = len(name) - len(name.partition('.')[0])
- return sys.modules[module.__name__[:-cut_off]]
+ # Slice end needs to be positive to alleviate need to special-case
+ # when ``'.' not in name``.
+ return sys.modules[module.__name__[:len(module.__name__)-cut_off]]
else:
- # If a package was imported, try to import stuff from fromlist.
- if hasattr(module, '__path__'):
- if '*' in fromlist and hasattr(module, '__all__'):
- fromlist = list(fromlist)
- fromlist.remove('*')
- fromlist.extend(module.__all__)
- for x in (y for y in fromlist if not hasattr(module,y)):
- try:
- _gcd_import('{0}.{1}'.format(module.__name__, x))
- except ImportError:
- pass
- return module
+ return _handle_fromlist(module, fromlist, _gcd_import)
+
+
+
+def _setup(sys_module, _imp_module):
+ """Setup importlib by importing needed built-in modules and injecting them
+ into the global namespace.
+
+ As sys is needed for sys.modules access and _imp is needed to load built-in
+ modules, those two modules must be explicitly passed in.
+
+ """
+ global _imp, sys, BYTECODE_SUFFIXES
+ _imp = _imp_module
+ sys = sys_module
+
+ if sys.flags.optimize:
+ BYTECODE_SUFFIXES = OPTIMIZED_BYTECODE_SUFFIXES
+ else:
+ BYTECODE_SUFFIXES = DEBUG_BYTECODE_SUFFIXES
+
+ for module in (_imp, sys):
+ if not hasattr(module, '__loader__'):
+ module.__loader__ = BuiltinImporter
+
+ self_module = sys.modules[__name__]
+ for builtin_name in ('_io', '_warnings', 'builtins', 'marshal'):
+ if builtin_name not in sys.modules:
+ builtin_module = BuiltinImporter.load_module(builtin_name)
+ else:
+ builtin_module = sys.modules[builtin_name]
+ setattr(self_module, builtin_name, builtin_module)
+
+ os_details = ('posix', ['/']), ('nt', ['\\', '/']), ('os2', ['\\', '/'])
+ for builtin_os, path_separators in os_details:
+ # Assumption made in _path_join()
+ assert all(len(sep) == 1 for sep in path_separators)
+ path_sep = path_separators[0]
+ if builtin_os in sys.modules:
+ os_module = sys.modules[builtin_os]
+ break
+ else:
+ try:
+ os_module = BuiltinImporter.load_module(builtin_os)
+ # TODO: rip out os2 code after 3.3 is released as per PEP 11
+ if builtin_os == 'os2' and 'EMX GCC' in sys.version:
+ path_sep = path_separators[1]
+ break
+ except ImportError:
+ continue
+ else:
+ raise ImportError('importlib requires posix or nt')
+
+ try:
+ thread_module = BuiltinImporter.load_module('_thread')
+ except ImportError:
+ # Python was built without threads
+ thread_module = None
+ weakref_module = BuiltinImporter.load_module('_weakref')
+
+ if builtin_os == 'nt':
+ winreg_module = BuiltinImporter.load_module('winreg')
+ setattr(self_module, '_winreg', winreg_module)
+
+ setattr(self_module, '_os', os_module)
+ setattr(self_module, '_thread', thread_module)
+ setattr(self_module, '_weakref', weakref_module)
+ setattr(self_module, 'path_sep', path_sep)
+ setattr(self_module, 'path_separators', set(path_separators))
+ # Constants
+ setattr(self_module, '_relax_case', _make_relax_case())
+ EXTENSION_SUFFIXES.extend(_imp.extension_suffixes())
+ if builtin_os == 'nt':
+ SOURCE_SUFFIXES.append('.pyw')
+ if '_d.pyd' in EXTENSION_SUFFIXES:
+ WindowsRegistryFinder.DEBUG_BUILD = True
+
+
+def _install(sys_module, _imp_module):
+ """Install importlib as the implementation of import."""
+ _setup(sys_module, _imp_module)
+ supported_loaders = _get_supported_file_loaders()
+ sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)])
+ sys.meta_path.append(BuiltinImporter)
+ sys.meta_path.append(FrozenImporter)
+ if _os.__name__ == 'nt':
+ sys.meta_path.append(WindowsRegistryFinder)
+ sys.meta_path.append(PathFinder)
diff --git a/Lib/importlib/abc.py b/Lib/importlib/abc.py
index fa343f8..387567a 100644
--- a/Lib/importlib/abc.py
+++ b/Lib/importlib/abc.py
@@ -1,44 +1,109 @@
"""Abstract base classes related to import."""
from . import _bootstrap
from . import machinery
-from . import util
+try:
+ import _frozen_importlib
+except ImportError as exc:
+ if exc.name != '_frozen_importlib':
+ raise
+ _frozen_importlib = None
import abc
import imp
-import io
import marshal
-import os.path
import sys
import tokenize
-import types
import warnings
-class Loader(metaclass=abc.ABCMeta):
+def _register(abstract_cls, *classes):
+ for cls in classes:
+ abstract_cls.register(cls)
+ if _frozen_importlib is not None:
+ frozen_cls = getattr(_frozen_importlib, cls.__name__)
+ abstract_cls.register(frozen_cls)
- """Abstract base class for import loaders."""
+
+class Finder(metaclass=abc.ABCMeta):
+
+ """Legacy abstract base class for import finders.
+
+ It may be subclassed for compatibility with legacy third party
+ reimplementations of the import system. Otherwise, finder
+ implementations should derive from the more specific MetaPathFinder
+ or PathEntryFinder ABCs.
+ """
@abc.abstractmethod
- def load_module(self, fullname):
- """Abstract method which when implemented should load a module.
- The fullname is a str."""
+ def find_module(self, fullname, path=None):
+ """An abstract method that should find a module.
+ The fullname is a str and the optional path is a str or None.
+ Returns a Loader object.
+ """
raise NotImplementedError
-class Finder(metaclass=abc.ABCMeta):
+class MetaPathFinder(Finder):
- """Abstract base class for import finders."""
+ """Abstract base class for import finders on sys.meta_path."""
@abc.abstractmethod
- def find_module(self, fullname, path=None):
- """Abstract method which when implemented should find a module.
- The fullname is a str and the optional path is a str or None.
+ def find_module(self, fullname, path):
+ """Abstract method which, when implemented, should find a module.
+ The fullname is a str and the path is a str or None.
Returns a Loader object.
"""
raise NotImplementedError
-Finder.register(machinery.BuiltinImporter)
-Finder.register(machinery.FrozenImporter)
-Finder.register(machinery.PathFinder)
+ def invalidate_caches(self):
+ """An optional method for clearing the finder's cache, if any.
+ This method is used by importlib.invalidate_caches().
+ """
+ return NotImplemented
+
+_register(MetaPathFinder, machinery.BuiltinImporter, machinery.FrozenImporter,
+ machinery.PathFinder, machinery.WindowsRegistryFinder)
+
+
+class PathEntryFinder(Finder):
+
+ """Abstract base class for path entry finders used by PathFinder."""
+
+ @abc.abstractmethod
+ def find_loader(self, fullname):
+ """Abstract method which, when implemented, returns a module loader.
+ The fullname is a str. Returns a 2-tuple of (Loader, portion) where
+ portion is a sequence of file system locations contributing to part of
+ a namespace package. The sequence may be empty and the loader may be
+ None.
+ """
+ raise NotImplementedError
+
+ find_module = _bootstrap._find_module_shim
+
+ def invalidate_caches(self):
+ """An optional method for clearing the finder's cache, if any.
+ This method is used by PathFinder.invalidate_caches().
+ """
+ return NotImplemented
+
+_register(PathEntryFinder, machinery.FileFinder)
+
+
+class Loader(metaclass=abc.ABCMeta):
+
+ """Abstract base class for import loaders."""
+
+ @abc.abstractmethod
+ def load_module(self, fullname):
+ """Abstract method which when implemented should load a module.
+ The fullname is a str."""
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def module_repr(self, module):
+ """Abstract method which when implemented calculates and returns the
+ given module's repr."""
+ raise NotImplementedError
class ResourceLoader(Loader):
@@ -84,8 +149,8 @@ class InspectLoader(Loader):
module. The fullname is a str. Returns a str."""
raise NotImplementedError
-InspectLoader.register(machinery.BuiltinImporter)
-InspectLoader.register(machinery.FrozenImporter)
+_register(InspectLoader, machinery.BuiltinImporter, machinery.FrozenImporter,
+ machinery.ExtensionFileLoader)
class ExecutionLoader(InspectLoader):
@@ -104,6 +169,15 @@ class ExecutionLoader(InspectLoader):
raise NotImplementedError
+class FileLoader(_bootstrap.FileLoader, ResourceLoader, ExecutionLoader):
+
+ """Abstract base class partially implementing the ResourceLoader and
+ ExecutionLoader ABCs."""
+
+_register(FileLoader, machinery.SourceFileLoader,
+ machinery.SourcelessFileLoader)
+
+
class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):
"""Abstract base class for loading source code (and optionally any
@@ -123,7 +197,20 @@ class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):
def path_mtime(self, path):
"""Return the (int) modification time for the path (str)."""
- raise NotImplementedError
+ if self.path_stats.__func__ is SourceLoader.path_stats:
+ raise NotImplementedError
+ return int(self.path_stats(path)['mtime'])
+
+ def path_stats(self, path):
+ """Return a metadata dict for the source pointed to by the path (str).
+ Possible keys:
+ - 'mtime' (mandatory) is the numeric timestamp of last source
+ code modification;
+ - 'size' (optional) is the size in bytes of the source code.
+ """
+ if self.path_mtime.__func__ is SourceLoader.path_mtime:
+ raise NotImplementedError
+ return {'mtime': self.path_mtime(path)}
def set_data(self, path, data):
"""Write the bytes to the path (if possible).
@@ -137,6 +224,7 @@ class SourceLoader(_bootstrap.SourceLoader, ResourceLoader, ExecutionLoader):
"""
raise NotImplementedError
+_register(SourceLoader, machinery.SourceFileLoader)
class PyLoader(SourceLoader):
@@ -195,10 +283,10 @@ class PyLoader(SourceLoader):
"use SourceLoader instead. "
"See the importlib documentation on how to be "
"compatible with Python 3.1 onwards.",
- PendingDeprecationWarning)
+ DeprecationWarning)
path = self.source_path(fullname)
if path is None:
- raise ImportError
+ raise ImportError(name=fullname)
else:
return path
@@ -226,7 +314,7 @@ class PyPycLoader(PyLoader):
if path is not None:
return path
raise ImportError("no source or bytecode path available for "
- "{0!r}".format(fullname))
+ "{0!r}".format(fullname), name=fullname)
def get_code(self, fullname):
"""Get a code object from source or bytecode."""
@@ -234,7 +322,7 @@ class PyPycLoader(PyLoader):
"removal in Python 3.4; use SourceLoader instead. "
"If Python 3.1 compatibility is required, see the "
"latest documentation for PyLoader.",
- PendingDeprecationWarning)
+ DeprecationWarning)
source_timestamp = self.source_mtime(fullname)
# Try to use bytecode if it is available.
bytecode_path = self.bytecode_path(fullname)
@@ -243,20 +331,30 @@ class PyPycLoader(PyLoader):
try:
magic = data[:4]
if len(magic) < 4:
- raise ImportError("bad magic number in {}".format(fullname))
+ raise ImportError(
+ "bad magic number in {}".format(fullname),
+ name=fullname, path=bytecode_path)
raw_timestamp = data[4:8]
if len(raw_timestamp) < 4:
raise EOFError("bad timestamp in {}".format(fullname))
- pyc_timestamp = marshal._r_long(raw_timestamp)
- bytecode = data[8:]
+ pyc_timestamp = _bootstrap._r_long(raw_timestamp)
+ raw_source_size = data[8:12]
+ if len(raw_source_size) != 4:
+ raise EOFError("bad file size in {}".format(fullname))
+ # Source size is unused as the ABC does not provide a way to
+ # get the size of the source ahead of reading it.
+ bytecode = data[12:]
# Verify that the magic number is valid.
if imp.get_magic() != magic:
- raise ImportError("bad magic number in {}".format(fullname))
+ raise ImportError(
+ "bad magic number in {}".format(fullname),
+ name=fullname, path=bytecode_path)
# Verify that the bytecode is not stale (only matters when
# there is source to fall back on.
if source_timestamp:
if pyc_timestamp < source_timestamp:
- raise ImportError("bytecode is stale")
+ raise ImportError("bytecode is stale", name=fullname,
+ path=bytecode_path)
except (ImportError, EOFError):
# If source is available give it a shot.
if source_timestamp is not None:
@@ -268,18 +366,20 @@ class PyPycLoader(PyLoader):
return marshal.loads(bytecode)
elif source_timestamp is None:
raise ImportError("no source or bytecode available to create code "
- "object for {0!r}".format(fullname))
+ "object for {0!r}".format(fullname),
+ name=fullname)
# Use the source.
source_path = self.source_path(fullname)
if source_path is None:
message = "a source path must exist to load {0}".format(fullname)
- raise ImportError(message)
+ raise ImportError(message, name=fullname)
source = self.get_data(source_path)
code_object = compile(source, source_path, 'exec', dont_inherit=True)
# Generate bytecode and write it out.
if not sys.dont_write_bytecode:
data = bytearray(imp.get_magic())
- data.extend(marshal._w_long(source_timestamp))
+ data.extend(_bootstrap._w_long(source_timestamp))
+ data.extend(_bootstrap._w_long(len(source) & 0xFFFFFFFF))
data.extend(marshal.dumps(code_object))
self.write_bytecode(fullname, data)
return code_object
diff --git a/Lib/importlib/machinery.py b/Lib/importlib/machinery.py
index 5197744..ff826e4 100644
--- a/Lib/importlib/machinery.py
+++ b/Lib/importlib/machinery.py
@@ -1,5 +1,20 @@
"""The machinery of importlib: finders, loaders, hooks, etc."""
+import _imp
+
+from ._bootstrap import (SOURCE_SUFFIXES, DEBUG_BYTECODE_SUFFIXES,
+ OPTIMIZED_BYTECODE_SUFFIXES, BYTECODE_SUFFIXES,
+ EXTENSION_SUFFIXES)
from ._bootstrap import BuiltinImporter
from ._bootstrap import FrozenImporter
+from ._bootstrap import WindowsRegistryFinder
from ._bootstrap import PathFinder
+from ._bootstrap import FileFinder
+from ._bootstrap import SourceFileLoader
+from ._bootstrap import SourcelessFileLoader
+from ._bootstrap import ExtensionFileLoader
+
+
+def all_suffixes():
+ """Returns a list of all recognized module suffixes for this process"""
+ return SOURCE_SUFFIXES + BYTECODE_SUFFIXES + EXTENSION_SUFFIXES
diff --git a/Lib/importlib/test/__main__.py b/Lib/importlib/test/__main__.py
deleted file mode 100644
index decc53d..0000000
--- a/Lib/importlib/test/__main__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""Run importlib's test suite.
-
-Specifying the ``--builtin`` flag will run tests, where applicable, with
-builtins.__import__ instead of importlib.__import__.
-
-"""
-import importlib
-from importlib.test.import_ import util
-import os.path
-from test.support import run_unittest
-import sys
-import unittest
-
-
-def test_main():
- if '__pycache__' in __file__:
- parts = __file__.split(os.path.sep)
- start_dir = sep.join(parts[:-2])
- else:
- start_dir = os.path.dirname(__file__)
- top_dir = os.path.dirname(os.path.dirname(start_dir))
- test_loader = unittest.TestLoader()
- if '--builtin' in sys.argv:
- util.using___import__ = True
- run_unittest(test_loader.discover(start_dir, top_level_dir=top_dir))
-
-
-if __name__ == '__main__':
- test_main()
diff --git a/Lib/importlib/test/benchmark.py b/Lib/importlib/test/benchmark.py
deleted file mode 100644
index b5de6c6..0000000
--- a/Lib/importlib/test/benchmark.py
+++ /dev/null
@@ -1,172 +0,0 @@
-"""Benchmark some basic import use-cases.
-
-The assumption is made that this benchmark is run in a fresh interpreter and
-thus has no external changes made to import-related attributes in sys.
-
-"""
-from . import util
-from .source import util as source_util
-import decimal
-import imp
-import importlib
-import os
-import py_compile
-import sys
-import timeit
-
-
-def bench(name, cleanup=lambda: None, *, seconds=1, repeat=3):
- """Bench the given statement as many times as necessary until total
- executions take one second."""
- stmt = "__import__({!r})".format(name)
- timer = timeit.Timer(stmt)
- for x in range(repeat):
- total_time = 0
- count = 0
- while total_time < seconds:
- try:
- total_time += timer.timeit(1)
- finally:
- cleanup()
- count += 1
- else:
- # One execution too far
- if total_time > seconds:
- count -= 1
- yield count // seconds
-
-def from_cache(seconds, repeat):
- """sys.modules"""
- name = '<benchmark import>'
- module = imp.new_module(name)
- module.__file__ = '<test>'
- module.__package__ = ''
- with util.uncache(name):
- sys.modules[name] = module
- for result in bench(name, repeat=repeat, seconds=seconds):
- yield result
-
-
-def builtin_mod(seconds, repeat):
- """Built-in module"""
- name = 'errno'
- if name in sys.modules:
- del sys.modules[name]
- # Relying on built-in importer being implicit.
- for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat,
- seconds=seconds):
- yield result
-
-
-def source_wo_bytecode(seconds, repeat):
- """Source w/o bytecode: simple"""
- sys.dont_write_bytecode = True
- try:
- name = '__importlib_test_benchmark__'
- # Clears out sys.modules and puts an entry at the front of sys.path.
- with source_util.create_modules(name) as mapping:
- assert not os.path.exists(imp.cache_from_source(mapping[name]))
- for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat,
- seconds=seconds):
- yield result
- finally:
- sys.dont_write_bytecode = False
-
-
-def decimal_wo_bytecode(seconds, repeat):
- """Source w/o bytecode: decimal"""
- name = 'decimal'
- decimal_bytecode = imp.cache_from_source(decimal.__file__)
- if os.path.exists(decimal_bytecode):
- os.unlink(decimal_bytecode)
- sys.dont_write_bytecode = True
- try:
- for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat,
- seconds=seconds):
- yield result
- finally:
- sys.dont_write_bytecode = False
-
-
-def source_writing_bytecode(seconds, repeat):
- """Source writing bytecode: simple"""
- assert not sys.dont_write_bytecode
- name = '__importlib_test_benchmark__'
- with source_util.create_modules(name) as mapping:
- def cleanup():
- sys.modules.pop(name)
- os.unlink(imp.cache_from_source(mapping[name]))
- for result in bench(name, cleanup, repeat=repeat, seconds=seconds):
- assert not os.path.exists(imp.cache_from_source(mapping[name]))
- yield result
-
-
-def decimal_writing_bytecode(seconds, repeat):
- """Source writing bytecode: decimal"""
- assert not sys.dont_write_bytecode
- name = 'decimal'
- def cleanup():
- sys.modules.pop(name)
- os.unlink(imp.cache_from_source(decimal.__file__))
- for result in bench(name, cleanup, repeat=repeat, seconds=seconds):
- yield result
-
-
-def source_using_bytecode(seconds, repeat):
- """Bytecode w/ source: simple"""
- name = '__importlib_test_benchmark__'
- with source_util.create_modules(name) as mapping:
- py_compile.compile(mapping[name])
- assert os.path.exists(imp.cache_from_source(mapping[name]))
- for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat,
- seconds=seconds):
- yield result
-
-
-def decimal_using_bytecode(seconds, repeat):
- """Bytecode w/ source: decimal"""
- name = 'decimal'
- py_compile.compile(decimal.__file__)
- for result in bench(name, lambda: sys.modules.pop(name), repeat=repeat,
- seconds=seconds):
- yield result
-
-
-def main(import_):
- __builtins__.__import__ = import_
- benchmarks = (from_cache, builtin_mod,
- source_using_bytecode, source_wo_bytecode,
- source_writing_bytecode,
- decimal_using_bytecode, decimal_writing_bytecode,
- decimal_wo_bytecode,)
- seconds = 1
- seconds_plural = 's' if seconds > 1 else ''
- repeat = 3
- header = "Measuring imports/second over {} second{}, best out of {}\n"
- print(header.format(seconds, seconds_plural, repeat))
- for benchmark in benchmarks:
- print(benchmark.__doc__, "[", end=' ')
- sys.stdout.flush()
- results = []
- for result in benchmark(seconds=seconds, repeat=repeat):
- results.append(result)
- print(result, end=' ')
- sys.stdout.flush()
- assert not sys.dont_write_bytecode
- print("]", "best is", format(max(results), ',d'))
-
-
-if __name__ == '__main__':
- import optparse
-
- parser = optparse.OptionParser()
- parser.add_option('-b', '--builtin', dest='builtin', action='store_true',
- default=False, help="use the built-in __import__")
- options, args = parser.parse_args()
- if args:
- raise RuntimeError("unrecognized args: {}".format(args))
- import_ = __import__
- if not options.builtin:
- import_ = importlib.__import__
-
- main(import_)
diff --git a/Lib/importlib/test/extension/test_loader.py b/Lib/importlib/test/extension/test_loader.py
deleted file mode 100644
index 4a783db..0000000
--- a/Lib/importlib/test/extension/test_loader.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from importlib import _bootstrap
-from . import util as ext_util
-from .. import abc
-from .. import util
-
-import sys
-import unittest
-
-
-class LoaderTests(abc.LoaderTests):
-
- """Test load_module() for extension modules."""
-
- def load_module(self, fullname):
- loader = _bootstrap._ExtensionFileLoader(ext_util.NAME,
- ext_util.FILEPATH)
- return loader.load_module(fullname)
-
- def test_module(self):
- with util.uncache(ext_util.NAME):
- module = self.load_module(ext_util.NAME)
- for attr, value in [('__name__', ext_util.NAME),
- ('__file__', ext_util.FILEPATH),
- ('__package__', '')]:
- self.assertEqual(getattr(module, attr), value)
- self.assertTrue(ext_util.NAME in sys.modules)
- self.assertTrue(isinstance(module.__loader__,
- _bootstrap._ExtensionFileLoader))
-
- def test_package(self):
- # Extensions are not found in packages.
- pass
-
- def test_lacking_parent(self):
- # Extensions are not found in packages.
- pass
-
- def test_module_reuse(self):
- with util.uncache(ext_util.NAME):
- module1 = self.load_module(ext_util.NAME)
- module2 = self.load_module(ext_util.NAME)
- self.assertTrue(module1 is module2)
-
- def test_state_after_failure(self):
- # No easy way to trigger a failure after a successful import.
- pass
-
- def test_unloadable(self):
- with self.assertRaises(ImportError):
- self.load_module('asdfjkl;')
-
-
-def test_main():
- from test.support import run_unittest
- run_unittest(LoaderTests)
-
-
-if __name__ == '__main__':
- test_main()
diff --git a/Lib/importlib/test/import_/test_api.py b/Lib/importlib/test/import_/test_api.py
deleted file mode 100644
index 9075d42..0000000
--- a/Lib/importlib/test/import_/test_api.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from . import util
-import unittest
-
-
-class APITest(unittest.TestCase):
-
- """Test API-specific details for __import__ (e.g. raising the right
- exception when passing in an int for the module name)."""
-
- def test_name_requires_rparition(self):
- # Raise TypeError if a non-string is passed in for the module name.
- with self.assertRaises(TypeError):
- util.import_(42)
-
-
-def test_main():
- from test.support import run_unittest
- run_unittest(APITest)
-
-
-if __name__ == '__main__':
- test_main()
diff --git a/Lib/importlib/test/import_/test_packages.py b/Lib/importlib/test/import_/test_packages.py
deleted file mode 100644
index faadc32..0000000
--- a/Lib/importlib/test/import_/test_packages.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from .. import util
-from . import util as import_util
-import sys
-import unittest
-import importlib
-
-
-class ParentModuleTests(unittest.TestCase):
-
- """Importing a submodule should import the parent modules."""
-
- def test_import_parent(self):
- with util.mock_modules('pkg.__init__', 'pkg.module') as mock:
- with util.import_state(meta_path=[mock]):
- module = import_util.import_('pkg.module')
- self.assertTrue('pkg' in sys.modules)
-
- def test_bad_parent(self):
- with util.mock_modules('pkg.module') as mock:
- with util.import_state(meta_path=[mock]):
- with self.assertRaises(ImportError):
- import_util.import_('pkg.module')
-
- def test_module_not_package(self):
- # Try to import a submodule from a non-package should raise ImportError.
- assert not hasattr(sys, '__path__')
- with self.assertRaises(ImportError):
- import_util.import_('sys.no_submodules_here')
-
-
-def test_main():
- from test.support import run_unittest
- run_unittest(ParentModuleTests)
-
-
-if __name__ == '__main__':
- test_main()
diff --git a/Lib/importlib/test/import_/test_path.py b/Lib/importlib/test/import_/test_path.py
deleted file mode 100644
index 2faa231..0000000
--- a/Lib/importlib/test/import_/test_path.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from importlib import _bootstrap
-from importlib import machinery
-from .. import util
-from . import util as import_util
-import imp
-import os
-import sys
-import tempfile
-from test import support
-from types import MethodType
-import unittest
-
-
-class FinderTests(unittest.TestCase):
-
- """Tests for PathFinder."""
-
- def test_failure(self):
- # Test None returned upon not finding a suitable finder.
- module = '<test module>'
- with util.import_state():
- self.assertTrue(machinery.PathFinder.find_module(module) is None)
-
- def test_sys_path(self):
- # Test that sys.path is used when 'path' is None.
- # Implicitly tests that sys.path_importer_cache is used.
- module = '<test module>'
- path = '<test path>'
- importer = util.mock_modules(module)
- with util.import_state(path_importer_cache={path: importer},
- path=[path]):
- loader = machinery.PathFinder.find_module(module)
- self.assertTrue(loader is importer)
-
- def test_path(self):
- # Test that 'path' is used when set.
- # Implicitly tests that sys.path_importer_cache is used.
- module = '<test module>'
- path = '<test path>'
- importer = util.mock_modules(module)
- with util.import_state(path_importer_cache={path: importer}):
- loader = machinery.PathFinder.find_module(module, [path])
- self.assertTrue(loader is importer)
-
- def test_path_hooks(self):
- # Test that sys.path_hooks is used.
- # Test that sys.path_importer_cache is set.
- module = '<test module>'
- path = '<test path>'
- importer = util.mock_modules(module)
- hook = import_util.mock_path_hook(path, importer=importer)
- with util.import_state(path_hooks=[hook]):
- loader = machinery.PathFinder.find_module(module, [path])
- self.assertTrue(loader is importer)
- self.assertTrue(path in sys.path_importer_cache)
- self.assertTrue(sys.path_importer_cache[path] is importer)
-
- def test_path_importer_cache_has_None(self):
- # Test that if sys.path_importer_cache has None that None is returned.
- clear_cache = {path: None for path in sys.path}
- with util.import_state(path_importer_cache=clear_cache):
- for name in ('asynchat', 'sys', '<test module>'):
- self.assertTrue(machinery.PathFinder.find_module(name) is None)
-
- def test_path_importer_cache_has_None_continues(self):
- # Test that having None in sys.path_importer_cache causes the search to
- # continue.
- path = '<test path>'
- module = '<test module>'
- importer = util.mock_modules(module)
- with util.import_state(path=['1', '2'],
- path_importer_cache={'1': None, '2': importer}):
- loader = machinery.PathFinder.find_module(module)
- self.assertTrue(loader is importer)
-
-
-
-class DefaultPathFinderTests(unittest.TestCase):
-
- """Test importlib._bootstrap._DefaultPathFinder."""
-
- def test_implicit_hooks(self):
- # Test that the implicit path hooks are used.
- bad_path = '<path>'
- module = '<module>'
- assert not os.path.exists(bad_path)
- existing_path = tempfile.mkdtemp()
- try:
- with util.import_state():
- nothing = _bootstrap._DefaultPathFinder.find_module(module,
- path=[existing_path])
- self.assertTrue(nothing is None)
- self.assertTrue(existing_path in sys.path_importer_cache)
- result = isinstance(sys.path_importer_cache[existing_path],
- imp.NullImporter)
- self.assertFalse(result)
- nothing = _bootstrap._DefaultPathFinder.find_module(module,
- path=[bad_path])
- self.assertTrue(nothing is None)
- self.assertTrue(bad_path in sys.path_importer_cache)
- self.assertTrue(isinstance(sys.path_importer_cache[bad_path],
- imp.NullImporter))
- finally:
- os.rmdir(existing_path)
-
-
- def test_path_importer_cache_has_None(self):
- # Test that the default hook is used when sys.path_importer_cache
- # contains None for a path.
- module = '<test module>'
- importer = util.mock_modules(module)
- path = '<test path>'
- # XXX Not blackbox.
- original_hook = _bootstrap._DEFAULT_PATH_HOOK
- mock_hook = import_util.mock_path_hook(path, importer=importer)
- _bootstrap._DEFAULT_PATH_HOOK = mock_hook
- try:
- with util.import_state(path_importer_cache={path: None}):
- loader = _bootstrap._DefaultPathFinder.find_module(module,
- path=[path])
- self.assertTrue(loader is importer)
- finally:
- _bootstrap._DEFAULT_PATH_HOOK = original_hook
-
-
-def test_main():
- from test.support import run_unittest
- run_unittest(FinderTests, DefaultPathFinderTests)
-
-if __name__ == '__main__':
- test_main()
diff --git a/Lib/importlib/test/regrtest.py b/Lib/importlib/test/regrtest.py
deleted file mode 100644
index b103ae7d..0000000
--- a/Lib/importlib/test/regrtest.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""Run Python's standard test suite using importlib.__import__.
-
-Tests known to fail because of assumptions that importlib (properly)
-invalidates are automatically skipped if the entire test suite is run.
-Otherwise all command-line options valid for test.regrtest are also valid for
-this script.
-
-XXX FAILING
- * test_import
- - test_incorrect_code_name
- file name differing between __file__ and co_filename (r68360 on trunk)
- - test_import_by_filename
- exception for trying to import by file name does not match
-
-"""
-import importlib
-import sys
-from test import regrtest
-
-if __name__ == '__main__':
- __builtins__.__import__ = importlib.__import__
-
- exclude = ['--exclude',
- 'test_frozen', # Does not expect __loader__ attribute
- 'test_pkg', # Does not expect __loader__ attribute
- 'test_pydoc', # Does not expect __loader__ attribute
- ]
-
- # Switching on --exclude implies running all test but the ones listed, so
- # only use it when one is not running an explicit test
- if len(sys.argv) == 1:
- # No programmatic way to specify tests to exclude
- sys.argv.extend(exclude)
-
- regrtest.main(quiet=True, verbose2=True)
diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py
index 7b44fa1..1316437 100644
--- a/Lib/importlib/util.py
+++ b/Lib/importlib/util.py
@@ -3,3 +3,19 @@
from ._bootstrap import module_for_loader
from ._bootstrap import set_loader
from ._bootstrap import set_package
+from ._bootstrap import _resolve_name
+
+
+def resolve_name(name, package):
+ """Resolve a relative module name to an absolute one."""
+ if not name.startswith('.'):
+ return name
+ elif not package:
+ raise ValueError('{!r} is not a relative name '
+ '(no leading dot)'.format(name))
+ level = 0
+ for character in name:
+ if character != '.':
+ break
+ level += 1
+ return _resolve_name(name[level:], package, level)
diff --git a/Lib/inspect.py b/Lib/inspect.py
index 2031755..88f0ee2 100644
--- a/Lib/inspect.py
+++ b/Lib/inspect.py
@@ -22,24 +22,29 @@ Here are some of the useful functions provided by this module:
getouterframes(), getinnerframes() - get info about frames
currentframe() - get the current stack frame
stack(), trace() - get info about frames on the stack or in a traceback
+
+ signature() - get a Signature object for the callable
"""
# This module is in the public domain. No warranties.
-__author__ = 'Ka-Ping Yee <ping@lfw.org>'
-__date__ = '1 Jan 2001'
+__author__ = ('Ka-Ping Yee <ping@lfw.org>',
+ 'Yury Selivanov <yselivanov@sprymix.com>')
-import sys
-import os
-import types
+import imp
+import importlib.machinery
import itertools
-import string
+import linecache
+import os
import re
-import imp
+import sys
import tokenize
-import linecache
+import types
+import warnings
+import functools
+import builtins
from operator import attrgetter
-from collections import namedtuple
+from collections import namedtuple, OrderedDict
# Create constants for the compiler flags in Include/code.h
# We try to get them from dis to avoid duplication, but fall
@@ -433,6 +438,8 @@ ModuleInfo = namedtuple('ModuleInfo', 'name suffix mode module_type')
def getmoduleinfo(path):
"""Get the module name, suffix, mode, and module type for a given file."""
+ warnings.warn('inspect.getmoduleinfo() is deprecated', DeprecationWarning,
+ 2)
filename = os.path.basename(path)
suffixes = [(-len(suffix), suffix, mode, mtype)
for suffix, mode, mtype in imp.get_suffixes()]
@@ -443,20 +450,29 @@ def getmoduleinfo(path):
def getmodulename(path):
"""Return the module name for a given file, or None."""
- info = getmoduleinfo(path)
- if info: return info[0]
+ fname = os.path.basename(path)
+ # Check for paths that look like an actual module file
+ suffixes = [(-len(suffix), suffix)
+ for suffix in importlib.machinery.all_suffixes()]
+ suffixes.sort() # try longest suffixes first, in case they overlap
+ for neglen, suffix in suffixes:
+ if fname.endswith(suffix):
+ return fname[:neglen]
+ return None
def getsourcefile(object):
"""Return the filename that can be used to locate an object's source.
Return None if no way can be identified to get the source.
"""
filename = getfile(object)
- if filename[-4:].lower() in ('.pyc', '.pyo'):
- filename = filename[:-4] + '.py'
- for suffix, mode, kind in imp.get_suffixes():
- if 'b' in mode and filename[-len(suffix):].lower() == suffix:
- # Looks like a binary file. We want to only return a text file.
- return None
+ all_bytecode_suffixes = importlib.machinery.DEBUG_BYTECODE_SUFFIXES[:]
+ all_bytecode_suffixes += importlib.machinery.OPTIMIZED_BYTECODE_SUFFIXES[:]
+ if any(filename.endswith(s) for s in all_bytecode_suffixes):
+ filename = (os.path.splitext(filename)[0] +
+ importlib.machinery.SOURCE_SUFFIXES[0])
+ elif any(filename.endswith(s) for s in
+ importlib.machinery.EXTENSION_SUFFIXES):
+ return None
if os.path.exists(filename):
return filename
# only return a non-existent filename if the module has a PEP 302 loader
@@ -931,6 +947,43 @@ def formatargvalues(args, varargs, varkw, locals,
specs.append(formatvarkw(varkw) + formatvalue(locals[varkw]))
return '(' + ', '.join(specs) + ')'
+def _missing_arguments(f_name, argnames, pos, values):
+ names = [repr(name) for name in argnames if name not in values]
+ missing = len(names)
+ if missing == 1:
+ s = names[0]
+ elif missing == 2:
+ s = "{} and {}".format(*names)
+ else:
+ tail = ", {} and {}".format(names[-2:])
+ del names[-2:]
+ s = ", ".join(names) + tail
+ raise TypeError("%s() missing %i required %s argument%s: %s" %
+ (f_name, missing,
+ "positional" if pos else "keyword-only",
+ "" if missing == 1 else "s", s))
+
+def _too_many(f_name, args, kwonly, varargs, defcount, given, values):
+ atleast = len(args) - defcount
+ kwonly_given = len([arg for arg in kwonly if arg in values])
+ if varargs:
+ plural = atleast != 1
+ sig = "at least %d" % (atleast,)
+ elif defcount:
+ plural = True
+ sig = "from %d to %d" % (atleast, len(args))
+ else:
+ plural = len(args) != 1
+ sig = str(len(args))
+ kwonly_sig = ""
+ if kwonly_given:
+ msg = " positional argument%s (and %d keyword-only argument%s)"
+ kwonly_sig = (msg % ("s" if given != 1 else "", kwonly_given,
+ "s" if kwonly_given != 1 else ""))
+ raise TypeError("%s() takes %s positional argument%s but %d%s %s given" %
+ (f_name, sig, "s" if plural else "", given, kwonly_sig,
+ "was" if given == 1 and not kwonly_given else "were"))
+
def getcallargs(func, *positional, **named):
"""Get the mapping of arguments to values.
@@ -942,65 +995,107 @@ def getcallargs(func, *positional, **named):
f_name = func.__name__
arg2value = {}
+
if ismethod(func) and func.__self__ is not None:
# implicit 'self' (or 'cls' for classmethods) argument
positional = (func.__self__,) + positional
num_pos = len(positional)
- num_total = num_pos + len(named)
num_args = len(args)
num_defaults = len(defaults) if defaults else 0
- for arg, value in zip(args, positional):
- arg2value[arg] = value
+
+ n = min(num_pos, num_args)
+ for i in range(n):
+ arg2value[args[i]] = positional[i]
if varargs:
- if num_pos > num_args:
- arg2value[varargs] = positional[-(num_pos-num_args):]
- else:
- arg2value[varargs] = ()
- elif 0 < num_args < num_pos:
- raise TypeError('%s() takes %s %d positional %s (%d given)' % (
- f_name, 'at most' if defaults else 'exactly', num_args,
- 'arguments' if num_args > 1 else 'argument', num_total))
- elif num_args == 0 and num_total:
- if varkw or kwonlyargs:
- if num_pos:
- # XXX: We should use num_pos, but Python also uses num_total:
- raise TypeError('%s() takes exactly 0 positional arguments '
- '(%d given)' % (f_name, num_total))
- else:
- raise TypeError('%s() takes no arguments (%d given)' %
- (f_name, num_total))
-
- for arg in itertools.chain(args, kwonlyargs):
- if arg in named:
- if arg in arg2value:
- raise TypeError("%s() got multiple values for keyword "
- "argument '%s'" % (f_name, arg))
+ arg2value[varargs] = tuple(positional[n:])
+ possible_kwargs = set(args + kwonlyargs)
+ if varkw:
+ arg2value[varkw] = {}
+ for kw, value in named.items():
+ if kw not in possible_kwargs:
+ if not varkw:
+ raise TypeError("%s() got an unexpected keyword argument %r" %
+ (f_name, kw))
+ arg2value[varkw][kw] = value
+ continue
+ if kw in arg2value:
+ raise TypeError("%s() got multiple values for argument %r" %
+ (f_name, kw))
+ arg2value[kw] = value
+ if num_pos > num_args and not varargs:
+ _too_many(f_name, args, kwonlyargs, varargs, num_defaults,
+ num_pos, arg2value)
+ if num_pos < num_args:
+ req = args[:num_args - num_defaults]
+ for arg in req:
+ if arg not in arg2value:
+ _missing_arguments(f_name, req, True, arg2value)
+ for i, arg in enumerate(args[num_args - num_defaults:]):
+ if arg not in arg2value:
+ arg2value[arg] = defaults[i]
+ missing = 0
+ for kwarg in kwonlyargs:
+ if kwarg not in arg2value:
+ if kwarg in kwonlydefaults:
+ arg2value[kwarg] = kwonlydefaults[kwarg]
else:
- arg2value[arg] = named.pop(arg)
- for kwonlyarg in kwonlyargs:
- if kwonlyarg not in arg2value:
+ missing += 1
+ if missing:
+ _missing_arguments(f_name, kwonlyargs, False, arg2value)
+ return arg2value
+
+ClosureVars = namedtuple('ClosureVars', 'nonlocals globals builtins unbound')
+
+def getclosurevars(func):
+ """
+ Get the mapping of free variables to their current values.
+
+ Returns a named tuple of dicts mapping the current nonlocal, global
+ and builtin references as seen by the body of the function. A final
+ set of unbound names that could not be resolved is also provided.
+ """
+
+ if ismethod(func):
+ func = func.__func__
+
+ if not isfunction(func):
+ raise TypeError("'{!r}' is not a Python function".format(func))
+
+ code = func.__code__
+ # Nonlocal references are named in co_freevars and resolved
+ # by looking them up in __closure__ by positional index
+ if func.__closure__ is None:
+ nonlocal_vars = {}
+ else:
+ nonlocal_vars = {
+ var : cell.cell_contents
+ for var, cell in zip(code.co_freevars, func.__closure__)
+ }
+
+ # Global and builtin references are named in co_names and resolved
+ # by looking them up in __globals__ or __builtins__
+ global_ns = func.__globals__
+ builtin_ns = global_ns.get("__builtins__", builtins.__dict__)
+ if ismodule(builtin_ns):
+ builtin_ns = builtin_ns.__dict__
+ global_vars = {}
+ builtin_vars = {}
+ unbound_names = set()
+ for name in code.co_names:
+ if name in ("None", "True", "False"):
+ # Because these used to be builtins instead of keywords, they
+ # may still show up as name references. We ignore them.
+ continue
+ try:
+ global_vars[name] = global_ns[name]
+ except KeyError:
try:
- arg2value[kwonlyarg] = kwonlydefaults[kwonlyarg]
+ builtin_vars[name] = builtin_ns[name]
except KeyError:
- raise TypeError("%s() needs keyword-only argument %s" %
- (f_name, kwonlyarg))
- if defaults: # fill in any missing values with the defaults
- for arg, value in zip(args[-num_defaults:], defaults):
- if arg not in arg2value:
- arg2value[arg] = value
- if varkw:
- arg2value[varkw] = named
- elif named:
- unexpected = next(iter(named))
- raise TypeError("%s() got an unexpected keyword argument '%s'" %
- (f_name, unexpected))
- unassigned = num_args - len([arg for arg in args if arg in arg2value])
- if unassigned:
- num_required = num_args - num_defaults
- raise TypeError('%s() takes %s %d %s (%d given)' % (
- f_name, 'at least' if defaults else 'exactly', num_required,
- 'arguments' if num_required > 1 else 'argument', num_total))
- return arg2value
+ unbound_names.add(name)
+
+ return ClosureVars(nonlocal_vars, global_vars,
+ builtin_vars, unbound_names)
# -------------------------------------------------- stack frame extraction
@@ -1171,6 +1266,8 @@ def getattr_static(obj, attr, default=_sentinel):
raise AttributeError(attr)
+# ------------------------------------------------ generator introspection
+
GEN_CREATED = 'GEN_CREATED'
GEN_RUNNING = 'GEN_RUNNING'
GEN_SUSPENDED = 'GEN_SUSPENDED'
@@ -1192,3 +1289,785 @@ def getgeneratorstate(generator):
if generator.gi_frame.f_lasti == -1:
return GEN_CREATED
return GEN_SUSPENDED
+
+
+def getgeneratorlocals(generator):
+ """
+ Get the mapping of generator local variables to their current values.
+
+ A dict is returned, with the keys the local variable names and values the
+ bound values."""
+
+ if not isgenerator(generator):
+ raise TypeError("'{!r}' is not a Python generator".format(generator))
+
+ frame = getattr(generator, "gi_frame", None)
+ if frame is not None:
+ return generator.gi_frame.f_locals
+ else:
+ return {}
+
+###############################################################################
+### Function Signature Object (PEP 362)
+###############################################################################
+
+
+_WrapperDescriptor = type(type.__call__)
+_MethodWrapper = type(all.__call__)
+
+_NonUserDefinedCallables = (_WrapperDescriptor,
+ _MethodWrapper,
+ types.BuiltinFunctionType)
+
+
+def _get_user_defined_method(cls, method_name):
+ try:
+ meth = getattr(cls, method_name)
+ except AttributeError:
+ return
+ else:
+ if not isinstance(meth, _NonUserDefinedCallables):
+ # Once '__signature__' will be added to 'C'-level
+ # callables, this check won't be necessary
+ return meth
+
+
+def signature(obj):
+ '''Get a signature object for the passed callable.'''
+
+ if not callable(obj):
+ raise TypeError('{!r} is not a callable object'.format(obj))
+
+ if isinstance(obj, types.MethodType):
+ # In this case we skip the first parameter of the underlying
+ # function (usually `self` or `cls`).
+ sig = signature(obj.__func__)
+ return sig.replace(parameters=tuple(sig.parameters.values())[1:])
+
+ try:
+ sig = obj.__signature__
+ except AttributeError:
+ pass
+ else:
+ if sig is not None:
+ return sig
+
+ try:
+ # Was this function wrapped by a decorator?
+ wrapped = obj.__wrapped__
+ except AttributeError:
+ pass
+ else:
+ return signature(wrapped)
+
+ if isinstance(obj, types.FunctionType):
+ return Signature.from_function(obj)
+
+ if isinstance(obj, functools.partial):
+ sig = signature(obj.func)
+
+ new_params = OrderedDict(sig.parameters.items())
+
+ partial_args = obj.args or ()
+ partial_keywords = obj.keywords or {}
+ try:
+ ba = sig.bind_partial(*partial_args, **partial_keywords)
+ except TypeError as ex:
+ msg = 'partial object {!r} has incorrect arguments'.format(obj)
+ raise ValueError(msg) from ex
+
+ for arg_name, arg_value in ba.arguments.items():
+ param = new_params[arg_name]
+ if arg_name in partial_keywords:
+ # We set a new default value, because the following code
+ # is correct:
+ #
+ # >>> def foo(a): print(a)
+ # >>> print(partial(partial(foo, a=10), a=20)())
+ # 20
+ # >>> print(partial(partial(foo, a=10), a=20)(a=30))
+ # 30
+ #
+ # So, with 'partial' objects, passing a keyword argument is
+ # like setting a new default value for the corresponding
+ # parameter
+ #
+ # We also mark this parameter with '_partial_kwarg'
+ # flag. Later, in '_bind', the 'default' value of this
+ # parameter will be added to 'kwargs', to simulate
+ # the 'functools.partial' real call.
+ new_params[arg_name] = param.replace(default=arg_value,
+ _partial_kwarg=True)
+
+ elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) and
+ not param._partial_kwarg):
+ new_params.pop(arg_name)
+
+ return sig.replace(parameters=new_params.values())
+
+ sig = None
+ if isinstance(obj, type):
+ # obj is a class or a metaclass
+
+ # First, let's see if it has an overloaded __call__ defined
+ # in its metaclass
+ call = _get_user_defined_method(type(obj), '__call__')
+ if call is not None:
+ sig = signature(call)
+ else:
+ # Now we check if the 'obj' class has a '__new__' method
+ new = _get_user_defined_method(obj, '__new__')
+ if new is not None:
+ sig = signature(new)
+ else:
+ # Finally, we should have at least __init__ implemented
+ init = _get_user_defined_method(obj, '__init__')
+ if init is not None:
+ sig = signature(init)
+ elif not isinstance(obj, _NonUserDefinedCallables):
+ # An object with __call__
+ # We also check that the 'obj' is not an instance of
+ # _WrapperDescriptor or _MethodWrapper to avoid
+ # infinite recursion (and even potential segfault)
+ call = _get_user_defined_method(type(obj), '__call__')
+ if call is not None:
+ sig = signature(call)
+
+ if sig is not None:
+ # For classes and objects we skip the first parameter of their
+ # __call__, __new__, or __init__ methods
+ return sig.replace(parameters=tuple(sig.parameters.values())[1:])
+
+ if isinstance(obj, types.BuiltinFunctionType):
+ # Raise a nicer error message for builtins
+ msg = 'no signature found for builtin function {!r}'.format(obj)
+ raise ValueError(msg)
+
+ raise ValueError('callable {!r} is not supported by signature'.format(obj))
+
+
+class _void:
+ '''A private marker - used in Parameter & Signature'''
+
+
+class _empty:
+ pass
+
+
+class _ParameterKind(int):
+ def __new__(self, *args, name):
+ obj = int.__new__(self, *args)
+ obj._name = name
+ return obj
+
+ def __str__(self):
+ return self._name
+
+ def __repr__(self):
+ return '<_ParameterKind: {!r}>'.format(self._name)
+
+
+_POSITIONAL_ONLY = _ParameterKind(0, name='POSITIONAL_ONLY')
+_POSITIONAL_OR_KEYWORD = _ParameterKind(1, name='POSITIONAL_OR_KEYWORD')
+_VAR_POSITIONAL = _ParameterKind(2, name='VAR_POSITIONAL')
+_KEYWORD_ONLY = _ParameterKind(3, name='KEYWORD_ONLY')
+_VAR_KEYWORD = _ParameterKind(4, name='VAR_KEYWORD')
+
+
+class Parameter:
+ '''Represents a parameter in a function signature.
+
+ Has the following public attributes:
+
+ * name : str
+ The name of the parameter as a string.
+ * default : object
+ The default value for the parameter if specified. If the
+ parameter has no default value, this attribute is not set.
+ * annotation
+ The annotation for the parameter if specified. If the
+ parameter has no annotation, this attribute is not set.
+ * kind : str
+ Describes how argument values are bound to the parameter.
+ Possible values: `Parameter.POSITIONAL_ONLY`,
+ `Parameter.POSITIONAL_OR_KEYWORD`, `Parameter.VAR_POSITIONAL`,
+ `Parameter.KEYWORD_ONLY`, `Parameter.VAR_KEYWORD`.
+ '''
+
+ __slots__ = ('_name', '_kind', '_default', '_annotation', '_partial_kwarg')
+
+ POSITIONAL_ONLY = _POSITIONAL_ONLY
+ POSITIONAL_OR_KEYWORD = _POSITIONAL_OR_KEYWORD
+ VAR_POSITIONAL = _VAR_POSITIONAL
+ KEYWORD_ONLY = _KEYWORD_ONLY
+ VAR_KEYWORD = _VAR_KEYWORD
+
+ empty = _empty
+
+ def __init__(self, name, kind, *, default=_empty, annotation=_empty,
+ _partial_kwarg=False):
+
+ if kind not in (_POSITIONAL_ONLY, _POSITIONAL_OR_KEYWORD,
+ _VAR_POSITIONAL, _KEYWORD_ONLY, _VAR_KEYWORD):
+ raise ValueError("invalid value for 'Parameter.kind' attribute")
+ self._kind = kind
+
+ if default is not _empty:
+ if kind in (_VAR_POSITIONAL, _VAR_KEYWORD):
+ msg = '{} parameters cannot have default values'.format(kind)
+ raise ValueError(msg)
+ self._default = default
+ self._annotation = annotation
+
+ if name is None:
+ if kind != _POSITIONAL_ONLY:
+ raise ValueError("None is not a valid name for a "
+ "non-positional-only parameter")
+ self._name = name
+ else:
+ name = str(name)
+ if kind != _POSITIONAL_ONLY and not name.isidentifier():
+ msg = '{!r} is not a valid parameter name'.format(name)
+ raise ValueError(msg)
+ self._name = name
+
+ self._partial_kwarg = _partial_kwarg
+
+ @property
+ def name(self):
+ return self._name
+
+ @property
+ def default(self):
+ return self._default
+
+ @property
+ def annotation(self):
+ return self._annotation
+
+ @property
+ def kind(self):
+ return self._kind
+
+ def replace(self, *, name=_void, kind=_void, annotation=_void,
+ default=_void, _partial_kwarg=_void):
+ '''Creates a customized copy of the Parameter.'''
+
+ if name is _void:
+ name = self._name
+
+ if kind is _void:
+ kind = self._kind
+
+ if annotation is _void:
+ annotation = self._annotation
+
+ if default is _void:
+ default = self._default
+
+ if _partial_kwarg is _void:
+ _partial_kwarg = self._partial_kwarg
+
+ return type(self)(name, kind, default=default, annotation=annotation,
+ _partial_kwarg=_partial_kwarg)
+
+ def __str__(self):
+ kind = self.kind
+
+ formatted = self._name
+ if kind == _POSITIONAL_ONLY:
+ if formatted is None:
+ formatted = ''
+ formatted = '<{}>'.format(formatted)
+
+ # Add annotation and default value
+ if self._annotation is not _empty:
+ formatted = '{}:{}'.format(formatted,
+ formatannotation(self._annotation))
+
+ if self._default is not _empty:
+ formatted = '{}={}'.format(formatted, repr(self._default))
+
+ if kind == _VAR_POSITIONAL:
+ formatted = '*' + formatted
+ elif kind == _VAR_KEYWORD:
+ formatted = '**' + formatted
+
+ return formatted
+
+ def __repr__(self):
+ return '<{} at {:#x} {!r}>'.format(self.__class__.__name__,
+ id(self), self.name)
+
+ def __eq__(self, other):
+ return (issubclass(other.__class__, Parameter) and
+ self._name == other._name and
+ self._kind == other._kind and
+ self._default == other._default and
+ self._annotation == other._annotation)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+
+class BoundArguments:
+ '''Result of `Signature.bind` call. Holds the mapping of arguments
+ to the function's parameters.
+
+ Has the following public attributes:
+
+ * arguments : OrderedDict
+ An ordered mutable mapping of parameters' names to arguments' values.
+ Does not contain arguments' default values.
+ * signature : Signature
+ The Signature object that created this instance.
+ * args : tuple
+ Tuple of positional arguments values.
+ * kwargs : dict
+ Dict of keyword arguments values.
+ '''
+
+ def __init__(self, signature, arguments):
+ self.arguments = arguments
+ self._signature = signature
+
+ @property
+ def signature(self):
+ return self._signature
+
+ @property
+ def args(self):
+ args = []
+ for param_name, param in self._signature.parameters.items():
+ if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
+ param._partial_kwarg):
+ # Keyword arguments mapped by 'functools.partial'
+ # (Parameter._partial_kwarg is True) are mapped
+ # in 'BoundArguments.kwargs', along with VAR_KEYWORD &
+ # KEYWORD_ONLY
+ break
+
+ try:
+ arg = self.arguments[param_name]
+ except KeyError:
+ # We're done here. Other arguments
+ # will be mapped in 'BoundArguments.kwargs'
+ break
+ else:
+ if param.kind == _VAR_POSITIONAL:
+ # *args
+ args.extend(arg)
+ else:
+ # plain argument
+ args.append(arg)
+
+ return tuple(args)
+
+ @property
+ def kwargs(self):
+ kwargs = {}
+ kwargs_started = False
+ for param_name, param in self._signature.parameters.items():
+ if not kwargs_started:
+ if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
+ param._partial_kwarg):
+ kwargs_started = True
+ else:
+ if param_name not in self.arguments:
+ kwargs_started = True
+ continue
+
+ if not kwargs_started:
+ continue
+
+ try:
+ arg = self.arguments[param_name]
+ except KeyError:
+ pass
+ else:
+ if param.kind == _VAR_KEYWORD:
+ # **kwargs
+ kwargs.update(arg)
+ else:
+ # plain keyword argument
+ kwargs[param_name] = arg
+
+ return kwargs
+
+ def __eq__(self, other):
+ return (issubclass(other.__class__, BoundArguments) and
+ self.signature == other.signature and
+ self.arguments == other.arguments)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+
+class Signature:
+ '''A Signature object represents the overall signature of a function.
+ It stores a Parameter object for each parameter accepted by the
+ function, as well as information specific to the function itself.
+
+ A Signature object has the following public attributes and methods:
+
+ * parameters : OrderedDict
+ An ordered mapping of parameters' names to the corresponding
+ Parameter objects (keyword-only arguments are in the same order
+ as listed in `code.co_varnames`).
+ * return_annotation : object
+ The annotation for the return type of the function if specified.
+ If the function has no annotation for its return type, this
+ attribute is not set.
+ * bind(*args, **kwargs) -> BoundArguments
+ Creates a mapping from positional and keyword arguments to
+ parameters.
+ * bind_partial(*args, **kwargs) -> BoundArguments
+ Creates a partial mapping from positional and keyword arguments
+ to parameters (simulating 'functools.partial' behavior.)
+ '''
+
+ __slots__ = ('_return_annotation', '_parameters')
+
+ _parameter_cls = Parameter
+ _bound_arguments_cls = BoundArguments
+
+ empty = _empty
+
+ def __init__(self, parameters=None, *, return_annotation=_empty,
+ __validate_parameters__=True):
+ '''Constructs Signature from the given list of Parameter
+ objects and 'return_annotation'. All arguments are optional.
+ '''
+
+ if parameters is None:
+ params = OrderedDict()
+ else:
+ if __validate_parameters__:
+ params = OrderedDict()
+ top_kind = _POSITIONAL_ONLY
+
+ for idx, param in enumerate(parameters):
+ kind = param.kind
+ if kind < top_kind:
+ msg = 'wrong parameter order: {} before {}'
+ msg = msg.format(top_kind, param.kind)
+ raise ValueError(msg)
+ else:
+ top_kind = kind
+
+ name = param.name
+ if name is None:
+ name = str(idx)
+ param = param.replace(name=name)
+
+ if name in params:
+ msg = 'duplicate parameter name: {!r}'.format(name)
+ raise ValueError(msg)
+ params[name] = param
+ else:
+ params = OrderedDict(((param.name, param)
+ for param in parameters))
+
+ self._parameters = types.MappingProxyType(params)
+ self._return_annotation = return_annotation
+
+ @classmethod
+ def from_function(cls, func):
+ '''Constructs Signature for the given python function'''
+
+ if not isinstance(func, types.FunctionType):
+ raise TypeError('{!r} is not a Python function'.format(func))
+
+ Parameter = cls._parameter_cls
+
+ # Parameter information.
+ func_code = func.__code__
+ pos_count = func_code.co_argcount
+ arg_names = func_code.co_varnames
+ positional = tuple(arg_names[:pos_count])
+ keyword_only_count = func_code.co_kwonlyargcount
+ keyword_only = arg_names[pos_count:(pos_count + keyword_only_count)]
+ annotations = func.__annotations__
+ defaults = func.__defaults__
+ kwdefaults = func.__kwdefaults__
+
+ if defaults:
+ pos_default_count = len(defaults)
+ else:
+ pos_default_count = 0
+
+ parameters = []
+
+ # Non-keyword-only parameters w/o defaults.
+ non_default_count = pos_count - pos_default_count
+ for name in positional[:non_default_count]:
+ annotation = annotations.get(name, _empty)
+ parameters.append(Parameter(name, annotation=annotation,
+ kind=_POSITIONAL_OR_KEYWORD))
+
+ # ... w/ defaults.
+ for offset, name in enumerate(positional[non_default_count:]):
+ annotation = annotations.get(name, _empty)
+ parameters.append(Parameter(name, annotation=annotation,
+ kind=_POSITIONAL_OR_KEYWORD,
+ default=defaults[offset]))
+
+ # *args
+ if func_code.co_flags & 0x04:
+ name = arg_names[pos_count + keyword_only_count]
+ annotation = annotations.get(name, _empty)
+ parameters.append(Parameter(name, annotation=annotation,
+ kind=_VAR_POSITIONAL))
+
+ # Keyword-only parameters.
+ for name in keyword_only:
+ default = _empty
+ if kwdefaults is not None:
+ default = kwdefaults.get(name, _empty)
+
+ annotation = annotations.get(name, _empty)
+ parameters.append(Parameter(name, annotation=annotation,
+ kind=_KEYWORD_ONLY,
+ default=default))
+ # **kwargs
+ if func_code.co_flags & 0x08:
+ index = pos_count + keyword_only_count
+ if func_code.co_flags & 0x04:
+ index += 1
+
+ name = arg_names[index]
+ annotation = annotations.get(name, _empty)
+ parameters.append(Parameter(name, annotation=annotation,
+ kind=_VAR_KEYWORD))
+
+ return cls(parameters,
+ return_annotation=annotations.get('return', _empty),
+ __validate_parameters__=False)
+
+ @property
+ def parameters(self):
+ return self._parameters
+
+ @property
+ def return_annotation(self):
+ return self._return_annotation
+
+ def replace(self, *, parameters=_void, return_annotation=_void):
+ '''Creates a customized copy of the Signature.
+ Pass 'parameters' and/or 'return_annotation' arguments
+ to override them in the new copy.
+ '''
+
+ if parameters is _void:
+ parameters = self.parameters.values()
+
+ if return_annotation is _void:
+ return_annotation = self._return_annotation
+
+ return type(self)(parameters,
+ return_annotation=return_annotation)
+
+ def __eq__(self, other):
+ if (not issubclass(type(other), Signature) or
+ self.return_annotation != other.return_annotation or
+ len(self.parameters) != len(other.parameters)):
+ return False
+
+ other_positions = {param: idx
+ for idx, param in enumerate(other.parameters.keys())}
+
+ for idx, (param_name, param) in enumerate(self.parameters.items()):
+ if param.kind == _KEYWORD_ONLY:
+ try:
+ other_param = other.parameters[param_name]
+ except KeyError:
+ return False
+ else:
+ if param != other_param:
+ return False
+ else:
+ try:
+ other_idx = other_positions[param_name]
+ except KeyError:
+ return False
+ else:
+ if (idx != other_idx or
+ param != other.parameters[param_name]):
+ return False
+
+ return True
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def _bind(self, args, kwargs, *, partial=False):
+ '''Private method. Don't use directly.'''
+
+ arguments = OrderedDict()
+
+ parameters = iter(self.parameters.values())
+ parameters_ex = ()
+ arg_vals = iter(args)
+
+ if partial:
+ # Support for binding arguments to 'functools.partial' objects.
+ # See 'functools.partial' case in 'signature()' implementation
+ # for details.
+ for param_name, param in self.parameters.items():
+ if (param._partial_kwarg and param_name not in kwargs):
+ # Simulating 'functools.partial' behavior
+ kwargs[param_name] = param.default
+
+ while True:
+ # Let's iterate through the positional arguments and corresponding
+ # parameters
+ try:
+ arg_val = next(arg_vals)
+ except StopIteration:
+ # No more positional arguments
+ try:
+ param = next(parameters)
+ except StopIteration:
+ # No more parameters. That's it. Just need to check that
+ # we have no `kwargs` after this while loop
+ break
+ else:
+ if param.kind == _VAR_POSITIONAL:
+ # That's OK, just empty *args. Let's start parsing
+ # kwargs
+ break
+ elif param.name in kwargs:
+ if param.kind == _POSITIONAL_ONLY:
+ msg = '{arg!r} parameter is positional only, ' \
+ 'but was passed as a keyword'
+ msg = msg.format(arg=param.name)
+ raise TypeError(msg) from None
+ parameters_ex = (param,)
+ break
+ elif (param.kind == _VAR_KEYWORD or
+ param.default is not _empty):
+ # That's fine too - we have a default value for this
+ # parameter. So, lets start parsing `kwargs`, starting
+ # with the current parameter
+ parameters_ex = (param,)
+ break
+ else:
+ if partial:
+ parameters_ex = (param,)
+ break
+ else:
+ msg = '{arg!r} parameter lacking default value'
+ msg = msg.format(arg=param.name)
+ raise TypeError(msg) from None
+ else:
+ # We have a positional argument to process
+ try:
+ param = next(parameters)
+ except StopIteration:
+ raise TypeError('too many positional arguments') from None
+ else:
+ if param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY):
+ # Looks like we have no parameter for this positional
+ # argument
+ raise TypeError('too many positional arguments')
+
+ if param.kind == _VAR_POSITIONAL:
+ # We have an '*args'-like argument, let's fill it with
+ # all positional arguments we have left and move on to
+ # the next phase
+ values = [arg_val]
+ values.extend(arg_vals)
+ arguments[param.name] = tuple(values)
+ break
+
+ if param.name in kwargs:
+ raise TypeError('multiple values for argument '
+ '{arg!r}'.format(arg=param.name))
+
+ arguments[param.name] = arg_val
+
+ # Now, we iterate through the remaining parameters to process
+ # keyword arguments
+ kwargs_param = None
+ for param in itertools.chain(parameters_ex, parameters):
+ if param.kind == _POSITIONAL_ONLY:
+ # This should never happen in case of a properly built
+ # Signature object (but let's have this check here
+ # to ensure correct behaviour just in case)
+ raise TypeError('{arg!r} parameter is positional only, '
+ 'but was passed as a keyword'. \
+ format(arg=param.name))
+
+ if param.kind == _VAR_KEYWORD:
+ # Memorize that we have a '**kwargs'-like parameter
+ kwargs_param = param
+ continue
+
+ param_name = param.name
+ try:
+ arg_val = kwargs.pop(param_name)
+ except KeyError:
+ # We have no value for this parameter. It's fine though,
+ # if it has a default value, or it is an '*args'-like
+ # parameter, left alone by the processing of positional
+ # arguments.
+ if (not partial and param.kind != _VAR_POSITIONAL and
+ param.default is _empty):
+ raise TypeError('{arg!r} parameter lacking default value'. \
+ format(arg=param_name)) from None
+
+ else:
+ arguments[param_name] = arg_val
+
+ if kwargs:
+ if kwargs_param is not None:
+ # Process our '**kwargs'-like parameter
+ arguments[kwargs_param.name] = kwargs
+ else:
+ raise TypeError('too many keyword arguments')
+
+ return self._bound_arguments_cls(self, arguments)
+
+ def bind(self, *args, **kwargs):
+ '''Get a BoundArguments object, that maps the passed `args`
+ and `kwargs` to the function's signature. Raises `TypeError`
+ if the passed arguments can not be bound.
+ '''
+ return self._bind(args, kwargs)
+
+ def bind_partial(self, *args, **kwargs):
+ '''Get a BoundArguments object, that partially maps the
+ passed `args` and `kwargs` to the function's signature.
+ Raises `TypeError` if the passed arguments can not be bound.
+ '''
+ return self._bind(args, kwargs, partial=True)
+
+ def __str__(self):
+ result = []
+ render_kw_only_separator = True
+ for idx, param in enumerate(self.parameters.values()):
+ formatted = str(param)
+
+ kind = param.kind
+ if kind == _VAR_POSITIONAL:
+ # OK, we have an '*args'-like parameter, so we won't need
+ # a '*' to separate keyword-only arguments
+ render_kw_only_separator = False
+ elif kind == _KEYWORD_ONLY and render_kw_only_separator:
+ # We have a keyword-only parameter to render and we haven't
+ # rendered an '*args'-like parameter before, so add a '*'
+ # separator to the parameters list ("foo(arg1, *, arg2)" case)
+ result.append('*')
+ # This condition should be only triggered once, so
+ # reset the flag
+ render_kw_only_separator = False
+
+ result.append(formatted)
+
+ rendered = '({})'.format(', '.join(result))
+
+ if self.return_annotation is not _empty:
+ anno = formatannotation(self.return_annotation)
+ rendered += ' -> {}'.format(anno)
+
+ return rendered
diff --git a/Lib/io.py b/Lib/io.py
index a59a75a..cfb70ac 100644
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -58,6 +58,9 @@ from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation,
OpenWrapper = _io.open # for compatibility with _pyio
+# Pretend this exception was created here.
+UnsupportedOperation.__module__ = "io"
+
# for seek()
SEEK_SET = 0
SEEK_CUR = 1
diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py
new file mode 100644
index 0000000..532e44e
--- /dev/null
+++ b/Lib/ipaddress.py
@@ -0,0 +1,2094 @@
+# Copyright 2007 Google Inc.
+# Licensed to PSF under a Contributor Agreement.
+
+"""A fast, lightweight IPv4/IPv6 manipulation library in Python.
+
+This library is used to create/poke/manipulate IPv4 and IPv6 addresses
+and networks.
+
+"""
+
+__version__ = '1.0'
+
+
+import functools
+
+IPV4LENGTH = 32
+IPV6LENGTH = 128
+
+class AddressValueError(ValueError):
+ """A Value Error related to the address."""
+
+
+class NetmaskValueError(ValueError):
+ """A Value Error related to the netmask."""
+
+
+def ip_address(address):
+ """Take an IP string/int and return an object of the correct type.
+
+ Args:
+ address: A string or integer, the IP address. Either IPv4 or
+ IPv6 addresses may be supplied; integers less than 2**32 will
+ be considered to be IPv4 by default.
+
+ Returns:
+ An IPv4Address or IPv6Address object.
+
+ Raises:
+ ValueError: if the *address* passed isn't either a v4 or a v6
+ address
+
+ """
+ try:
+ return IPv4Address(address)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ try:
+ return IPv6Address(address)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ raise ValueError('%r does not appear to be an IPv4 or IPv6 address' %
+ address)
+
+
+def ip_network(address, strict=True):
+ """Take an IP string/int and return an object of the correct type.
+
+ Args:
+ address: A string or integer, the IP network. Either IPv4 or
+ IPv6 networks may be supplied; integers less than 2**32 will
+ be considered to be IPv4 by default.
+
+ Returns:
+ An IPv4Network or IPv6Network object.
+
+ Raises:
+ ValueError: if the string passed isn't either a v4 or a v6
+ address. Or if the network has host bits set.
+
+ """
+ try:
+ return IPv4Network(address, strict)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ try:
+ return IPv6Network(address, strict)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ raise ValueError('%r does not appear to be an IPv4 or IPv6 network' %
+ address)
+
+
+def ip_interface(address):
+ """Take an IP string/int and return an object of the correct type.
+
+ Args:
+ address: A string or integer, the IP address. Either IPv4 or
+ IPv6 addresses may be supplied; integers less than 2**32 will
+ be considered to be IPv4 by default.
+
+ Returns:
+ An IPv4Interface or IPv6Interface object.
+
+ Raises:
+ ValueError: if the string passed isn't either a v4 or a v6
+ address.
+
+ Notes:
+ The IPv?Interface classes describe an Address on a particular
+ Network, so they're basically a combination of both the Address
+ and Network classes.
+
+ """
+ try:
+ return IPv4Interface(address)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ try:
+ return IPv6Interface(address)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ raise ValueError('%r does not appear to be an IPv4 or IPv6 interface' %
+ address)
+
+
+def v4_int_to_packed(address):
+ """Represent an address as 4 packed bytes in network (big-endian) order.
+
+ Args:
+ address: An integer representation of an IPv4 IP address.
+
+ Returns:
+ The integer address packed as 4 bytes in network (big-endian) order.
+
+ Raises:
+ ValueError: If the integer is negative or too large to be an
+ IPv4 IP address.
+
+ """
+ try:
+ return address.to_bytes(4, 'big')
+ except:
+ raise ValueError("Address negative or too large for IPv4")
+
+
+def v6_int_to_packed(address):
+ """Represent an address as 16 packed bytes in network (big-endian) order.
+
+ Args:
+ address: An integer representation of an IPv6 IP address.
+
+ Returns:
+ The integer address packed as 16 bytes in network (big-endian) order.
+
+ """
+ try:
+ return address.to_bytes(16, 'big')
+ except:
+ raise ValueError("Address negative or too large for IPv6")
+
+
+def _split_optional_netmask(address):
+ """Helper to split the netmask and raise AddressValueError if needed"""
+ addr = str(address).split('/')
+ if len(addr) > 2:
+ raise AddressValueError("Only one '/' permitted in %r" % address)
+ return addr
+
+
+def _find_address_range(addresses):
+ """Find a sequence of IPv#Address.
+
+ Args:
+ addresses: a list of IPv#Address objects.
+
+ Returns:
+ A tuple containing the first and last IP addresses in the sequence.
+
+ """
+ first = last = addresses[0]
+ for ip in addresses[1:]:
+ if ip._ip == last._ip + 1:
+ last = ip
+ else:
+ break
+ return (first, last)
+
+
+def _count_righthand_zero_bits(number, bits):
+ """Count the number of zero bits on the right hand side.
+
+ Args:
+ number: an integer.
+ bits: maximum number of bits to count.
+
+ Returns:
+ The number of zero bits on the right hand side of the number.
+
+ """
+ if number == 0:
+ return bits
+ for i in range(bits):
+ if (number >> i) & 1:
+ return i
+ # All bits of interest were zero, even if there are more in the number
+ return bits
+
+
+def summarize_address_range(first, last):
+ """Summarize a network range given the first and last IP addresses.
+
+ Example:
+ >>> list(summarize_address_range(IPv4Address('192.0.2.0'),
+ ... IPv4Address('192.0.2.130')))
+ ... #doctest: +NORMALIZE_WHITESPACE
+ [IPv4Network('192.0.2.0/25'), IPv4Network('192.0.2.128/31'),
+ IPv4Network('192.0.2.130/32')]
+
+ Args:
+ first: the first IPv4Address or IPv6Address in the range.
+ last: the last IPv4Address or IPv6Address in the range.
+
+ Returns:
+ An iterator of the summarized IPv(4|6) network objects.
+
+ Raise:
+ TypeError:
+ If the first and last objects are not IP addresses.
+ If the first and last objects are not the same version.
+ ValueError:
+ If the last object is not greater than the first.
+ If the version of the first address is not 4 or 6.
+
+ """
+ if (not (isinstance(first, _BaseAddress) and
+ isinstance(last, _BaseAddress))):
+ raise TypeError('first and last must be IP addresses, not networks')
+ if first.version != last.version:
+ raise TypeError("%s and %s are not of the same version" % (
+ first, last))
+ if first > last:
+ raise ValueError('last IP address must be greater than first')
+
+ if first.version == 4:
+ ip = IPv4Network
+ elif first.version == 6:
+ ip = IPv6Network
+ else:
+ raise ValueError('unknown IP version')
+
+ ip_bits = first._max_prefixlen
+ first_int = first._ip
+ last_int = last._ip
+ while first_int <= last_int:
+ nbits = min(_count_righthand_zero_bits(first_int, ip_bits),
+ (last_int - first_int + 1).bit_length() - 1)
+ net = ip('%s/%d' % (first, ip_bits - nbits))
+ yield net
+ first_int += 1 << nbits
+ if first_int - 1 == ip._ALL_ONES:
+ break
+ first = first.__class__(first_int)
+
+
+def _collapse_addresses_recursive(addresses):
+ """Loops through the addresses, collapsing concurrent netblocks.
+
+ Example:
+
+ ip1 = IPv4Network('192.0.2.0/26')
+ ip2 = IPv4Network('192.0.2.64/26')
+ ip3 = IPv4Network('192.0.2.128/26')
+ ip4 = IPv4Network('192.0.2.192/26')
+
+ _collapse_addresses_recursive([ip1, ip2, ip3, ip4]) ->
+ [IPv4Network('192.0.2.0/24')]
+
+ This shouldn't be called directly; it is called via
+ collapse_addresses([]).
+
+ Args:
+ addresses: A list of IPv4Network's or IPv6Network's
+
+ Returns:
+ A list of IPv4Network's or IPv6Network's depending on what we were
+ passed.
+
+ """
+ while True:
+ last_addr = None
+ ret_array = []
+ optimized = False
+
+ for cur_addr in addresses:
+ if not ret_array:
+ last_addr = cur_addr
+ ret_array.append(cur_addr)
+ elif (cur_addr.network_address >= last_addr.network_address and
+ cur_addr.broadcast_address <= last_addr.broadcast_address):
+ optimized = True
+ elif cur_addr == list(last_addr.supernet().subnets())[1]:
+ ret_array[-1] = last_addr = last_addr.supernet()
+ optimized = True
+ else:
+ last_addr = cur_addr
+ ret_array.append(cur_addr)
+
+ addresses = ret_array
+ if not optimized:
+ return addresses
+
+
+def collapse_addresses(addresses):
+ """Collapse a list of IP objects.
+
+ Example:
+ collapse_addresses([IPv4Network('192.0.2.0/25'),
+ IPv4Network('192.0.2.128/25')]) ->
+ [IPv4Network('192.0.2.0/24')]
+
+ Args:
+ addresses: An iterator of IPv4Network or IPv6Network objects.
+
+ Returns:
+ An iterator of the collapsed IPv(4|6)Network objects.
+
+ Raises:
+ TypeError: If passed a list of mixed version objects.
+
+ """
+ i = 0
+ addrs = []
+ ips = []
+ nets = []
+
+ # split IP addresses and networks
+ for ip in addresses:
+ if isinstance(ip, _BaseAddress):
+ if ips and ips[-1]._version != ip._version:
+ raise TypeError("%s and %s are not of the same version" % (
+ ip, ips[-1]))
+ ips.append(ip)
+ elif ip._prefixlen == ip._max_prefixlen:
+ if ips and ips[-1]._version != ip._version:
+ raise TypeError("%s and %s are not of the same version" % (
+ ip, ips[-1]))
+ try:
+ ips.append(ip.ip)
+ except AttributeError:
+ ips.append(ip.network_address)
+ else:
+ if nets and nets[-1]._version != ip._version:
+ raise TypeError("%s and %s are not of the same version" % (
+ ip, nets[-1]))
+ nets.append(ip)
+
+ # sort and dedup
+ ips = sorted(set(ips))
+ nets = sorted(set(nets))
+
+ while i < len(ips):
+ (first, last) = _find_address_range(ips[i:])
+ i = ips.index(last) + 1
+ addrs.extend(summarize_address_range(first, last))
+
+ return iter(_collapse_addresses_recursive(sorted(
+ addrs + nets, key=_BaseNetwork._get_networks_key)))
+
+
+def get_mixed_type_key(obj):
+ """Return a key suitable for sorting between networks and addresses.
+
+ Address and Network objects are not sortable by default; they're
+ fundamentally different so the expression
+
+ IPv4Address('192.0.2.0') <= IPv4Network('192.0.2.0/24')
+
+ doesn't make any sense. There are some times however, where you may wish
+ to have ipaddress sort these for you anyway. If you need to do this, you
+ can use this function as the key= argument to sorted().
+
+ Args:
+ obj: either a Network or Address object.
+ Returns:
+ appropriate key.
+
+ """
+ if isinstance(obj, _BaseNetwork):
+ return obj._get_networks_key()
+ elif isinstance(obj, _BaseAddress):
+ return obj._get_address_key()
+ return NotImplemented
+
+
+class _TotalOrderingMixin:
+ # Helper that derives the other comparison operations from
+ # __lt__ and __eq__
+ # We avoid functools.total_ordering because it doesn't handle
+ # NotImplemented correctly yet (http://bugs.python.org/issue10042)
+ def __eq__(self, other):
+ raise NotImplementedError
+ def __ne__(self, other):
+ equal = self.__eq__(other)
+ if equal is NotImplemented:
+ return NotImplemented
+ return not equal
+ def __lt__(self, other):
+ raise NotImplementedError
+ def __le__(self, other):
+ less = self.__lt__(other)
+ if less is NotImplemented or not less:
+ return self.__eq__(other)
+ return less
+ def __gt__(self, other):
+ less = self.__lt__(other)
+ if less is NotImplemented:
+ return NotImplemented
+ equal = self.__eq__(other)
+ if equal is NotImplemented:
+ return NotImplemented
+ return not (less or equal)
+ def __ge__(self, other):
+ less = self.__lt__(other)
+ if less is NotImplemented:
+ return NotImplemented
+ return not less
+
+class _IPAddressBase(_TotalOrderingMixin):
+
+ """The mother class."""
+
+ @property
+ def exploded(self):
+ """Return the longhand version of the IP address as a string."""
+ return self._explode_shorthand_ip_string()
+
+ @property
+ def compressed(self):
+ """Return the shorthand version of the IP address as a string."""
+ return str(self)
+
+ @property
+ def version(self):
+ msg = '%200s has no version specified' % (type(self),)
+ raise NotImplementedError(msg)
+
+ def _check_int_address(self, address):
+ if address < 0:
+ msg = "%d (< 0) is not permitted as an IPv%d address"
+ raise AddressValueError(msg % (address, self._version))
+ if address > self._ALL_ONES:
+ msg = "%d (>= 2**%d) is not permitted as an IPv%d address"
+ raise AddressValueError(msg % (address, self._max_prefixlen,
+ self._version))
+
+ def _check_packed_address(self, address, expected_len):
+ address_len = len(address)
+ if address_len != expected_len:
+ msg = "%r (len %d != %d) is not permitted as an IPv%d address"
+ raise AddressValueError(msg % (address, address_len,
+ expected_len, self._version))
+
+ def _ip_int_from_prefix(self, prefixlen=None):
+ """Turn the prefix length netmask into a int for comparison.
+
+ Args:
+ prefixlen: An integer, the prefix length.
+
+ Returns:
+ An integer.
+
+ """
+ if prefixlen is None:
+ prefixlen = self._prefixlen
+ return self._ALL_ONES ^ (self._ALL_ONES >> prefixlen)
+
+ def _prefix_from_ip_int(self, ip_int, mask=32):
+ """Return prefix length from the decimal netmask.
+
+ Args:
+ ip_int: An integer, the IP address.
+ mask: The netmask. Defaults to 32.
+
+ Returns:
+ An integer, the prefix length.
+
+ """
+ return mask - _count_righthand_zero_bits(ip_int, mask)
+
+ def _ip_string_from_prefix(self, prefixlen=None):
+ """Turn a prefix length into a dotted decimal string.
+
+ Args:
+ prefixlen: An integer, the netmask prefix length.
+
+ Returns:
+ A string, the dotted decimal netmask string.
+
+ """
+ if not prefixlen:
+ prefixlen = self._prefixlen
+ return self._string_from_ip_int(self._ip_int_from_prefix(prefixlen))
+
+
+class _BaseAddress(_IPAddressBase):
+
+ """A generic IP object.
+
+ This IP class contains the version independent methods which are
+ used by single IP addresses.
+
+ """
+
+ def __init__(self, address):
+ if (not isinstance(address, bytes)
+ and '/' in str(address)):
+ raise AddressValueError("Unexpected '/' in %r" % address)
+
+ def __int__(self):
+ return self._ip
+
+ def __eq__(self, other):
+ try:
+ return (self._ip == other._ip
+ and self._version == other._version)
+ except AttributeError:
+ return NotImplemented
+
+ def __lt__(self, other):
+ if self._version != other._version:
+ raise TypeError('%s and %s are not of the same version' % (
+ self, other))
+ if not isinstance(other, _BaseAddress):
+ raise TypeError('%s and %s are not of the same type' % (
+ self, other))
+ if self._ip != other._ip:
+ return self._ip < other._ip
+ return False
+
+ # Shorthand for Integer addition and subtraction. This is not
+ # meant to ever support addition/subtraction of addresses.
+ def __add__(self, other):
+ if not isinstance(other, int):
+ return NotImplemented
+ return self.__class__(int(self) + other)
+
+ def __sub__(self, other):
+ if not isinstance(other, int):
+ return NotImplemented
+ return self.__class__(int(self) - other)
+
+ def __repr__(self):
+ return '%s(%r)' % (self.__class__.__name__, str(self))
+
+ def __str__(self):
+ return str(self._string_from_ip_int(self._ip))
+
+ def __hash__(self):
+ return hash(hex(int(self._ip)))
+
+ def _get_address_key(self):
+ return (self._version, self)
+
+
+class _BaseNetwork(_IPAddressBase):
+
+ """A generic IP network object.
+
+ This IP class contains the version independent methods which are
+ used by networks.
+
+ """
+ def __init__(self, address):
+ self._cache = {}
+
+ def __repr__(self):
+ return '%s(%r)' % (self.__class__.__name__, str(self))
+
+ def __str__(self):
+ return '%s/%d' % (self.network_address, self.prefixlen)
+
+ def hosts(self):
+ """Generate Iterator over usable hosts in a network.
+
+ This is like __iter__ except it doesn't return the network
+ or broadcast addresses.
+
+ """
+ network = int(self.network_address)
+ broadcast = int(self.broadcast_address)
+ for x in range(network + 1, broadcast):
+ yield self._address_class(x)
+
+ def __iter__(self):
+ network = int(self.network_address)
+ broadcast = int(self.broadcast_address)
+ for x in range(network, broadcast + 1):
+ yield self._address_class(x)
+
+ def __getitem__(self, n):
+ network = int(self.network_address)
+ broadcast = int(self.broadcast_address)
+ if n >= 0:
+ if network + n > broadcast:
+ raise IndexError
+ return self._address_class(network + n)
+ else:
+ n += 1
+ if broadcast + n < network:
+ raise IndexError
+ return self._address_class(broadcast + n)
+
+ def __lt__(self, other):
+ if self._version != other._version:
+ raise TypeError('%s and %s are not of the same version' % (
+ self, other))
+ if not isinstance(other, _BaseNetwork):
+ raise TypeError('%s and %s are not of the same type' % (
+ self, other))
+ if self.network_address != other.network_address:
+ return self.network_address < other.network_address
+ if self.netmask != other.netmask:
+ return self.netmask < other.netmask
+ return False
+
+ def __eq__(self, other):
+ try:
+ return (self._version == other._version and
+ self.network_address == other.network_address and
+ int(self.netmask) == int(other.netmask))
+ except AttributeError:
+ return NotImplemented
+
+ def __hash__(self):
+ return hash(int(self.network_address) ^ int(self.netmask))
+
+ def __contains__(self, other):
+ # always false if one is v4 and the other is v6.
+ if self._version != other._version:
+ return False
+ # dealing with another network.
+ if isinstance(other, _BaseNetwork):
+ return False
+ # dealing with another address
+ else:
+ # address
+ return (int(self.network_address) <= int(other._ip) <=
+ int(self.broadcast_address))
+
+ def overlaps(self, other):
+ """Tell if self is partly contained in other."""
+ return self.network_address in other or (
+ self.broadcast_address in other or (
+ other.network_address in self or (
+ other.broadcast_address in self)))
+
+ @property
+ def broadcast_address(self):
+ x = self._cache.get('broadcast_address')
+ if x is None:
+ x = self._address_class(int(self.network_address) |
+ int(self.hostmask))
+ self._cache['broadcast_address'] = x
+ return x
+
+ @property
+ def hostmask(self):
+ x = self._cache.get('hostmask')
+ if x is None:
+ x = self._address_class(int(self.netmask) ^ self._ALL_ONES)
+ self._cache['hostmask'] = x
+ return x
+
+ @property
+ def with_prefixlen(self):
+ return '%s/%d' % (self.network_address, self._prefixlen)
+
+ @property
+ def with_netmask(self):
+ return '%s/%s' % (self.network_address, self.netmask)
+
+ @property
+ def with_hostmask(self):
+ return '%s/%s' % (self.network_address, self.hostmask)
+
+ @property
+ def num_addresses(self):
+ """Number of hosts in the current subnet."""
+ return int(self.broadcast_address) - int(self.network_address) + 1
+
+ @property
+ def _address_class(self):
+ # Returning bare address objects (rather than interfaces) allows for
+ # more consistent behaviour across the network address, broadcast
+ # address and individual host addresses.
+ msg = '%200s has no associated address class' % (type(self),)
+ raise NotImplementedError(msg)
+
+ @property
+ def prefixlen(self):
+ return self._prefixlen
+
+ def address_exclude(self, other):
+ """Remove an address from a larger block.
+
+ For example:
+
+ addr1 = ip_network('192.0.2.0/28')
+ addr2 = ip_network('192.0.2.1/32')
+ addr1.address_exclude(addr2) =
+ [IPv4Network('192.0.2.0/32'), IPv4Network('192.0.2.2/31'),
+ IPv4Network('192.0.2.4/30'), IPv4Network('192.0.2.8/29')]
+
+ or IPv6:
+
+ addr1 = ip_network('2001:db8::1/32')
+ addr2 = ip_network('2001:db8::1/128')
+ addr1.address_exclude(addr2) =
+ [ip_network('2001:db8::1/128'),
+ ip_network('2001:db8::2/127'),
+ ip_network('2001:db8::4/126'),
+ ip_network('2001:db8::8/125'),
+ ...
+ ip_network('2001:db8:8000::/33')]
+
+ Args:
+ other: An IPv4Network or IPv6Network object of the same type.
+
+ Returns:
+ An iterator of the the IPv(4|6)Network objects which is self
+ minus other.
+
+ Raises:
+ TypeError: If self and other are of difffering address
+ versions, or if other is not a network object.
+ ValueError: If other is not completely contained by self.
+
+ """
+ if not self._version == other._version:
+ raise TypeError("%s and %s are not of the same version" % (
+ self, other))
+
+ if not isinstance(other, _BaseNetwork):
+ raise TypeError("%s is not a network object" % other)
+
+ if not (other.network_address >= self.network_address and
+ other.broadcast_address <= self.broadcast_address):
+ raise ValueError('%s not contained in %s' % (other, self))
+ if other == self:
+ raise StopIteration
+
+ # Make sure we're comparing the network of other.
+ other = other.__class__('%s/%s' % (other.network_address,
+ other.prefixlen))
+
+ s1, s2 = self.subnets()
+ while s1 != other and s2 != other:
+ if (other.network_address >= s1.network_address and
+ other.broadcast_address <= s1.broadcast_address):
+ yield s2
+ s1, s2 = s1.subnets()
+ elif (other.network_address >= s2.network_address and
+ other.broadcast_address <= s2.broadcast_address):
+ yield s1
+ s1, s2 = s2.subnets()
+ else:
+ # If we got here, there's a bug somewhere.
+ raise AssertionError('Error performing exclusion: '
+ 's1: %s s2: %s other: %s' %
+ (s1, s2, other))
+ if s1 == other:
+ yield s2
+ elif s2 == other:
+ yield s1
+ else:
+ # If we got here, there's a bug somewhere.
+ raise AssertionError('Error performing exclusion: '
+ 's1: %s s2: %s other: %s' %
+ (s1, s2, other))
+
+ def compare_networks(self, other):
+ """Compare two IP objects.
+
+ This is only concerned about the comparison of the integer
+ representation of the network addresses. This means that the
+ host bits aren't considered at all in this method. If you want
+ to compare host bits, you can easily enough do a
+ 'HostA._ip < HostB._ip'
+
+ Args:
+ other: An IP object.
+
+ Returns:
+ If the IP versions of self and other are the same, returns:
+
+ -1 if self < other:
+ eg: IPv4Network('192.0.2.0/25') < IPv4Network('192.0.2.128/25')
+ IPv6Network('2001:db8::1000/124') <
+ IPv6Network('2001:db8::2000/124')
+ 0 if self == other
+ eg: IPv4Network('192.0.2.0/24') == IPv4Network('192.0.2.0/24')
+ IPv6Network('2001:db8::1000/124') ==
+ IPv6Network('2001:db8::1000/124')
+ 1 if self > other
+ eg: IPv4Network('192.0.2.128/25') > IPv4Network('192.0.2.0/25')
+ IPv6Network('2001:db8::2000/124') >
+ IPv6Network('2001:db8::1000/124')
+
+ Raises:
+ TypeError if the IP versions are different.
+
+ """
+ # does this need to raise a ValueError?
+ if self._version != other._version:
+ raise TypeError('%s and %s are not of the same type' % (
+ self, other))
+ # self._version == other._version below here:
+ if self.network_address < other.network_address:
+ return -1
+ if self.network_address > other.network_address:
+ return 1
+ # self.network_address == other.network_address below here:
+ if self.netmask < other.netmask:
+ return -1
+ if self.netmask > other.netmask:
+ return 1
+ return 0
+
+ def _get_networks_key(self):
+ """Network-only key function.
+
+ Returns an object that identifies this address' network and
+ netmask. This function is a suitable "key" argument for sorted()
+ and list.sort().
+
+ """
+ return (self._version, self.network_address, self.netmask)
+
+ def subnets(self, prefixlen_diff=1, new_prefix=None):
+ """The subnets which join to make the current subnet.
+
+ In the case that self contains only one IP
+ (self._prefixlen == 32 for IPv4 or self._prefixlen == 128
+ for IPv6), yield an iterator with just ourself.
+
+ Args:
+ prefixlen_diff: An integer, the amount the prefix length
+ should be increased by. This should not be set if
+ new_prefix is also set.
+ new_prefix: The desired new prefix length. This must be a
+ larger number (smaller prefix) than the existing prefix.
+ This should not be set if prefixlen_diff is also set.
+
+ Returns:
+ An iterator of IPv(4|6) objects.
+
+ Raises:
+ ValueError: The prefixlen_diff is too small or too large.
+ OR
+ prefixlen_diff and new_prefix are both set or new_prefix
+ is a smaller number than the current prefix (smaller
+ number means a larger network)
+
+ """
+ if self._prefixlen == self._max_prefixlen:
+ yield self
+ return
+
+ if new_prefix is not None:
+ if new_prefix < self._prefixlen:
+ raise ValueError('new prefix must be longer')
+ if prefixlen_diff != 1:
+ raise ValueError('cannot set prefixlen_diff and new_prefix')
+ prefixlen_diff = new_prefix - self._prefixlen
+
+ if prefixlen_diff < 0:
+ raise ValueError('prefix length diff must be > 0')
+ new_prefixlen = self._prefixlen + prefixlen_diff
+
+ if not self._is_valid_netmask(str(new_prefixlen)):
+ raise ValueError(
+ 'prefix length diff %d is invalid for netblock %s' % (
+ new_prefixlen, self))
+
+ first = self.__class__('%s/%s' %
+ (self.network_address,
+ self._prefixlen + prefixlen_diff))
+
+ yield first
+ current = first
+ while True:
+ broadcast = current.broadcast_address
+ if broadcast == self.broadcast_address:
+ return
+ new_addr = self._address_class(int(broadcast) + 1)
+ current = self.__class__('%s/%s' % (new_addr,
+ new_prefixlen))
+
+ yield current
+
+ def supernet(self, prefixlen_diff=1, new_prefix=None):
+ """The supernet containing the current network.
+
+ Args:
+ prefixlen_diff: An integer, the amount the prefix length of
+ the network should be decreased by. For example, given a
+ /24 network and a prefixlen_diff of 3, a supernet with a
+ /21 netmask is returned.
+
+ Returns:
+ An IPv4 network object.
+
+ Raises:
+ ValueError: If self.prefixlen - prefixlen_diff < 0. I.e., you have
+ a negative prefix length.
+ OR
+ If prefixlen_diff and new_prefix are both set or new_prefix is a
+ larger number than the current prefix (larger number means a
+ smaller network)
+
+ """
+ if self._prefixlen == 0:
+ return self
+
+ if new_prefix is not None:
+ if new_prefix > self._prefixlen:
+ raise ValueError('new prefix must be shorter')
+ if prefixlen_diff != 1:
+ raise ValueError('cannot set prefixlen_diff and new_prefix')
+ prefixlen_diff = self._prefixlen - new_prefix
+
+ if self.prefixlen - prefixlen_diff < 0:
+ raise ValueError(
+ 'current prefixlen is %d, cannot have a prefixlen_diff of %d' %
+ (self.prefixlen, prefixlen_diff))
+ # TODO (pmoody): optimize this.
+ t = self.__class__('%s/%d' % (self.network_address,
+ self.prefixlen - prefixlen_diff),
+ strict=False)
+ return t.__class__('%s/%d' % (t.network_address, t.prefixlen))
+
+ @property
+ def is_multicast(self):
+ """Test if the address is reserved for multicast use.
+
+ Returns:
+ A boolean, True if the address is a multicast address.
+ See RFC 2373 2.7 for details.
+
+ """
+ return (self.network_address.is_multicast and
+ self.broadcast_address.is_multicast)
+
+ @property
+ def is_reserved(self):
+ """Test if the address is otherwise IETF reserved.
+
+ Returns:
+ A boolean, True if the address is within one of the
+ reserved IPv6 Network ranges.
+
+ """
+ return (self.network_address.is_reserved and
+ self.broadcast_address.is_reserved)
+
+ @property
+ def is_link_local(self):
+ """Test if the address is reserved for link-local.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 4291.
+
+ """
+ return (self.network_address.is_link_local and
+ self.broadcast_address.is_link_local)
+
+ @property
+ def is_private(self):
+ """Test if this address is allocated for private networks.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 4193.
+
+ """
+ return (self.network_address.is_private and
+ self.broadcast_address.is_private)
+
+ @property
+ def is_unspecified(self):
+ """Test if the address is unspecified.
+
+ Returns:
+ A boolean, True if this is the unspecified address as defined in
+ RFC 2373 2.5.2.
+
+ """
+ return (self.network_address.is_unspecified and
+ self.broadcast_address.is_unspecified)
+
+ @property
+ def is_loopback(self):
+ """Test if the address is a loopback address.
+
+ Returns:
+ A boolean, True if the address is a loopback address as defined in
+ RFC 2373 2.5.3.
+
+ """
+ return (self.network_address.is_loopback and
+ self.broadcast_address.is_loopback)
+
+
+class _BaseV4:
+
+ """Base IPv4 object.
+
+ The following methods are used by IPv4 objects in both single IP
+ addresses and networks.
+
+ """
+
+ # Equivalent to 255.255.255.255 or 32 bits of 1's.
+ _ALL_ONES = (2**IPV4LENGTH) - 1
+ _DECIMAL_DIGITS = frozenset('0123456789')
+
+ # the valid octets for host and netmasks. only useful for IPv4.
+ _valid_mask_octets = frozenset((255, 254, 252, 248, 240, 224, 192, 128, 0))
+
+ def __init__(self, address):
+ self._version = 4
+ self._max_prefixlen = IPV4LENGTH
+
+ def _explode_shorthand_ip_string(self):
+ return str(self)
+
+ def _ip_int_from_string(self, ip_str):
+ """Turn the given IP string into an integer for comparison.
+
+ Args:
+ ip_str: A string, the IP ip_str.
+
+ Returns:
+ The IP ip_str as an integer.
+
+ Raises:
+ AddressValueError: if ip_str isn't a valid IPv4 Address.
+
+ """
+ if not ip_str:
+ raise AddressValueError('Address cannot be empty')
+
+ octets = ip_str.split('.')
+ if len(octets) != 4:
+ raise AddressValueError("Expected 4 octets in %r" % ip_str)
+
+ try:
+ return int.from_bytes(map(self._parse_octet, octets), 'big')
+ except ValueError as exc:
+ raise AddressValueError("%s in %r" % (exc, ip_str)) from None
+
+ def _parse_octet(self, octet_str):
+ """Convert a decimal octet into an integer.
+
+ Args:
+ octet_str: A string, the number to parse.
+
+ Returns:
+ The octet as an integer.
+
+ Raises:
+ ValueError: if the octet isn't strictly a decimal from [0..255].
+
+ """
+ if not octet_str:
+ raise ValueError("Empty octet not permitted")
+ # Whitelist the characters, since int() allows a lot of bizarre stuff.
+ if not self._DECIMAL_DIGITS.issuperset(octet_str):
+ msg = "Only decimal digits permitted in %r"
+ raise ValueError(msg % octet_str)
+ # We do the length check second, since the invalid character error
+ # is likely to be more informative for the user
+ if len(octet_str) > 3:
+ msg = "At most 3 characters permitted in %r"
+ raise ValueError(msg % octet_str)
+ # Convert to integer (we know digits are legal)
+ octet_int = int(octet_str, 10)
+ # Any octets that look like they *might* be written in octal,
+ # and which don't look exactly the same in both octal and
+ # decimal are rejected as ambiguous
+ if octet_int > 7 and octet_str[0] == '0':
+ msg = "Ambiguous (octal/decimal) value in %r not permitted"
+ raise ValueError(msg % octet_str)
+ if octet_int > 255:
+ raise ValueError("Octet %d (> 255) not permitted" % octet_int)
+ return octet_int
+
+ def _string_from_ip_int(self, ip_int):
+ """Turns a 32-bit integer into dotted decimal notation.
+
+ Args:
+ ip_int: An integer, the IP address.
+
+ Returns:
+ The IP address as a string in dotted decimal notation.
+
+ """
+ return '.'.join(map(str, ip_int.to_bytes(4, 'big')))
+
+ def _is_valid_netmask(self, netmask):
+ """Verify that the netmask is valid.
+
+ Args:
+ netmask: A string, either a prefix or dotted decimal
+ netmask.
+
+ Returns:
+ A boolean, True if the prefix represents a valid IPv4
+ netmask.
+
+ """
+ mask = netmask.split('.')
+ if len(mask) == 4:
+ try:
+ for x in mask:
+ if int(x) not in self._valid_mask_octets:
+ return False
+ except ValueError:
+ # Found something that isn't an integer or isn't valid
+ return False
+ for idx, y in enumerate(mask):
+ if idx > 0 and y > mask[idx - 1]:
+ return False
+ return True
+ try:
+ netmask = int(netmask)
+ except ValueError:
+ return False
+ return 0 <= netmask <= self._max_prefixlen
+
+ def _is_hostmask(self, ip_str):
+ """Test if the IP string is a hostmask (rather than a netmask).
+
+ Args:
+ ip_str: A string, the potential hostmask.
+
+ Returns:
+ A boolean, True if the IP string is a hostmask.
+
+ """
+ bits = ip_str.split('.')
+ try:
+ parts = [x for x in map(int, bits) if x in self._valid_mask_octets]
+ except ValueError:
+ return False
+ if len(parts) != len(bits):
+ return False
+ if parts[0] < parts[-1]:
+ return True
+ return False
+
+ @property
+ def max_prefixlen(self):
+ return self._max_prefixlen
+
+ @property
+ def version(self):
+ return self._version
+
+
+class IPv4Address(_BaseV4, _BaseAddress):
+
+ """Represent and manipulate single IPv4 Addresses."""
+
+ def __init__(self, address):
+
+ """
+ Args:
+ address: A string or integer representing the IP
+
+ Additionally, an integer can be passed, so
+ IPv4Address('192.0.2.1') == IPv4Address(3221225985).
+ or, more generally
+ IPv4Address(int(IPv4Address('192.0.2.1'))) ==
+ IPv4Address('192.0.2.1')
+
+ Raises:
+ AddressValueError: If ipaddress isn't a valid IPv4 address.
+
+ """
+ _BaseAddress.__init__(self, address)
+ _BaseV4.__init__(self, address)
+
+ # Efficient constructor from integer.
+ if isinstance(address, int):
+ self._check_int_address(address)
+ self._ip = address
+ return
+
+ # Constructing from a packed address
+ if isinstance(address, bytes):
+ self._check_packed_address(address, 4)
+ self._ip = int.from_bytes(address, 'big')
+ return
+
+ # Assume input argument to be string or any object representation
+ # which converts into a formatted IP string.
+ addr_str = str(address)
+ self._ip = self._ip_int_from_string(addr_str)
+
+ @property
+ def packed(self):
+ """The binary representation of this address."""
+ return v4_int_to_packed(self._ip)
+
+ @property
+ def is_reserved(self):
+ """Test if the address is otherwise IETF reserved.
+
+ Returns:
+ A boolean, True if the address is within the
+ reserved IPv4 Network range.
+
+ """
+ reserved_network = IPv4Network('240.0.0.0/4')
+ return self in reserved_network
+
+ @property
+ def is_private(self):
+ """Test if this address is allocated for private networks.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 1918.
+
+ """
+ private_10 = IPv4Network('10.0.0.0/8')
+ private_172 = IPv4Network('172.16.0.0/12')
+ private_192 = IPv4Network('192.168.0.0/16')
+ return (self in private_10 or
+ self in private_172 or
+ self in private_192)
+
+ @property
+ def is_multicast(self):
+ """Test if the address is reserved for multicast use.
+
+ Returns:
+ A boolean, True if the address is multicast.
+ See RFC 3171 for details.
+
+ """
+ multicast_network = IPv4Network('224.0.0.0/4')
+ return self in multicast_network
+
+ @property
+ def is_unspecified(self):
+ """Test if the address is unspecified.
+
+ Returns:
+ A boolean, True if this is the unspecified address as defined in
+ RFC 5735 3.
+
+ """
+ unspecified_address = IPv4Address('0.0.0.0')
+ return self == unspecified_address
+
+ @property
+ def is_loopback(self):
+ """Test if the address is a loopback address.
+
+ Returns:
+ A boolean, True if the address is a loopback per RFC 3330.
+
+ """
+ loopback_network = IPv4Network('127.0.0.0/8')
+ return self in loopback_network
+
+ @property
+ def is_link_local(self):
+ """Test if the address is reserved for link-local.
+
+ Returns:
+ A boolean, True if the address is link-local per RFC 3927.
+
+ """
+ linklocal_network = IPv4Network('169.254.0.0/16')
+ return self in linklocal_network
+
+
+class IPv4Interface(IPv4Address):
+
+ def __init__(self, address):
+ if isinstance(address, (bytes, int)):
+ IPv4Address.__init__(self, address)
+ self.network = IPv4Network(self._ip)
+ self._prefixlen = self._max_prefixlen
+ return
+
+ addr = _split_optional_netmask(address)
+ IPv4Address.__init__(self, addr[0])
+
+ self.network = IPv4Network(address, strict=False)
+ self._prefixlen = self.network._prefixlen
+
+ self.netmask = self.network.netmask
+ self.hostmask = self.network.hostmask
+
+ def __str__(self):
+ return '%s/%d' % (self._string_from_ip_int(self._ip),
+ self.network.prefixlen)
+
+ def __eq__(self, other):
+ address_equal = IPv4Address.__eq__(self, other)
+ if not address_equal or address_equal is NotImplemented:
+ return address_equal
+ try:
+ return self.network == other.network
+ except AttributeError:
+ # An interface with an associated network is NOT the
+ # same as an unassociated address. That's why the hash
+ # takes the extra info into account.
+ return False
+
+ def __lt__(self, other):
+ address_less = IPv4Address.__lt__(self, other)
+ if address_less is NotImplemented:
+ return NotImplemented
+ try:
+ return self.network < other.network
+ except AttributeError:
+ # We *do* allow addresses and interfaces to be sorted. The
+ # unassociated address is considered less than all interfaces.
+ return False
+
+ def __hash__(self):
+ return self._ip ^ self._prefixlen ^ int(self.network.network_address)
+
+ @property
+ def ip(self):
+ return IPv4Address(self._ip)
+
+ @property
+ def with_prefixlen(self):
+ return '%s/%s' % (self._string_from_ip_int(self._ip),
+ self._prefixlen)
+
+ @property
+ def with_netmask(self):
+ return '%s/%s' % (self._string_from_ip_int(self._ip),
+ self.netmask)
+
+ @property
+ def with_hostmask(self):
+ return '%s/%s' % (self._string_from_ip_int(self._ip),
+ self.hostmask)
+
+
+class IPv4Network(_BaseV4, _BaseNetwork):
+
+ """This class represents and manipulates 32-bit IPv4 network + addresses..
+
+ Attributes: [examples for IPv4Network('192.0.2.0/27')]
+ .network_address: IPv4Address('192.0.2.0')
+ .hostmask: IPv4Address('0.0.0.31')
+ .broadcast_address: IPv4Address('192.0.2.32')
+ .netmask: IPv4Address('255.255.255.224')
+ .prefixlen: 27
+
+ """
+ # Class to use when creating address objects
+ _address_class = IPv4Address
+
+ def __init__(self, address, strict=True):
+
+ """Instantiate a new IPv4 network object.
+
+ Args:
+ address: A string or integer representing the IP [& network].
+ '192.0.2.0/24'
+ '192.0.2.0/255.255.255.0'
+ '192.0.0.2/0.0.0.255'
+ are all functionally the same in IPv4. Similarly,
+ '192.0.2.1'
+ '192.0.2.1/255.255.255.255'
+ '192.0.2.1/32'
+ are also functionaly equivalent. That is to say, failing to
+ provide a subnetmask will create an object with a mask of /32.
+
+ If the mask (portion after the / in the argument) is given in
+ dotted quad form, it is treated as a netmask if it starts with a
+ non-zero field (e.g. /255.0.0.0 == /8) and as a hostmask if it
+ starts with a zero field (e.g. 0.255.255.255 == /8), with the
+ single exception of an all-zero mask which is treated as a
+ netmask == /0. If no mask is given, a default of /32 is used.
+
+ Additionally, an integer can be passed, so
+ IPv4Network('192.0.2.1') == IPv4Network(3221225985)
+ or, more generally
+ IPv4Interface(int(IPv4Interface('192.0.2.1'))) ==
+ IPv4Interface('192.0.2.1')
+
+ Raises:
+ AddressValueError: If ipaddress isn't a valid IPv4 address.
+ NetmaskValueError: If the netmask isn't valid for
+ an IPv4 address.
+ ValueError: If strict is True and a network address is not
+ supplied.
+
+ """
+
+ _BaseV4.__init__(self, address)
+ _BaseNetwork.__init__(self, address)
+
+ # Constructing from a packed address
+ if isinstance(address, bytes):
+ self.network_address = IPv4Address(address)
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv4Address(self._ALL_ONES)
+ #fixme: address/network test here
+ return
+
+ # Efficient constructor from integer.
+ if isinstance(address, int):
+ self.network_address = IPv4Address(address)
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv4Address(self._ALL_ONES)
+ #fixme: address/network test here.
+ return
+
+ # Assume input argument to be string or any object representation
+ # which converts into a formatted IP prefix string.
+ addr = _split_optional_netmask(address)
+ self.network_address = IPv4Address(self._ip_int_from_string(addr[0]))
+
+ if len(addr) == 2:
+ mask = addr[1].split('.')
+
+ if len(mask) == 4:
+ # We have dotted decimal netmask.
+ if self._is_valid_netmask(addr[1]):
+ self.netmask = IPv4Address(self._ip_int_from_string(
+ addr[1]))
+ elif self._is_hostmask(addr[1]):
+ self.netmask = IPv4Address(
+ self._ip_int_from_string(addr[1]) ^ self._ALL_ONES)
+ else:
+ raise NetmaskValueError('%r is not a valid netmask'
+ % addr[1])
+
+ self._prefixlen = self._prefix_from_ip_int(int(self.netmask))
+ else:
+ # We have a netmask in prefix length form.
+ if not self._is_valid_netmask(addr[1]):
+ raise NetmaskValueError('%r is not a valid netmask'
+ % addr[1])
+ self._prefixlen = int(addr[1])
+ self.netmask = IPv4Address(self._ip_int_from_prefix(
+ self._prefixlen))
+ else:
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv4Address(self._ip_int_from_prefix(
+ self._prefixlen))
+
+ if strict:
+ if (IPv4Address(int(self.network_address) & int(self.netmask)) !=
+ self.network_address):
+ raise ValueError('%s has host bits set' % self)
+ self.network_address = IPv4Address(int(self.network_address) &
+ int(self.netmask))
+
+ if self._prefixlen == (self._max_prefixlen - 1):
+ self.hosts = self.__iter__
+
+
+class _BaseV6:
+
+ """Base IPv6 object.
+
+ The following methods are used by IPv6 objects in both single IP
+ addresses and networks.
+
+ """
+
+ _ALL_ONES = (2**IPV6LENGTH) - 1
+ _HEXTET_COUNT = 8
+ _HEX_DIGITS = frozenset('0123456789ABCDEFabcdef')
+
+ def __init__(self, address):
+ self._version = 6
+ self._max_prefixlen = IPV6LENGTH
+
+ def _ip_int_from_string(self, ip_str):
+ """Turn an IPv6 ip_str into an integer.
+
+ Args:
+ ip_str: A string, the IPv6 ip_str.
+
+ Returns:
+ An int, the IPv6 address
+
+ Raises:
+ AddressValueError: if ip_str isn't a valid IPv6 Address.
+
+ """
+ if not ip_str:
+ raise AddressValueError('Address cannot be empty')
+
+ parts = ip_str.split(':')
+
+ # An IPv6 address needs at least 2 colons (3 parts).
+ _min_parts = 3
+ if len(parts) < _min_parts:
+ msg = "At least %d parts expected in %r" % (_min_parts, ip_str)
+ raise AddressValueError(msg)
+
+ # If the address has an IPv4-style suffix, convert it to hexadecimal.
+ if '.' in parts[-1]:
+ try:
+ ipv4_int = IPv4Address(parts.pop())._ip
+ except AddressValueError as exc:
+ raise AddressValueError("%s in %r" % (exc, ip_str)) from None
+ parts.append('%x' % ((ipv4_int >> 16) & 0xFFFF))
+ parts.append('%x' % (ipv4_int & 0xFFFF))
+
+ # An IPv6 address can't have more than 8 colons (9 parts).
+ # The extra colon comes from using the "::" notation for a single
+ # leading or trailing zero part.
+ _max_parts = self._HEXTET_COUNT + 1
+ if len(parts) > _max_parts:
+ msg = "At most %d colons permitted in %r" % (_max_parts-1, ip_str)
+ raise AddressValueError(msg)
+
+ # Disregarding the endpoints, find '::' with nothing in between.
+ # This indicates that a run of zeroes has been skipped.
+ skip_index = None
+ for i in range(1, len(parts) - 1):
+ if not parts[i]:
+ if skip_index is not None:
+ # Can't have more than one '::'
+ msg = "At most one '::' permitted in %r" % ip_str
+ raise AddressValueError(msg)
+ skip_index = i
+
+ # parts_hi is the number of parts to copy from above/before the '::'
+ # parts_lo is the number of parts to copy from below/after the '::'
+ if skip_index is not None:
+ # If we found a '::', then check if it also covers the endpoints.
+ parts_hi = skip_index
+ parts_lo = len(parts) - skip_index - 1
+ if not parts[0]:
+ parts_hi -= 1
+ if parts_hi:
+ msg = "Leading ':' only permitted as part of '::' in %r"
+ raise AddressValueError(msg % ip_str) # ^: requires ^::
+ if not parts[-1]:
+ parts_lo -= 1
+ if parts_lo:
+ msg = "Trailing ':' only permitted as part of '::' in %r"
+ raise AddressValueError(msg % ip_str) # :$ requires ::$
+ parts_skipped = self._HEXTET_COUNT - (parts_hi + parts_lo)
+ if parts_skipped < 1:
+ msg = "Expected at most %d other parts with '::' in %r"
+ raise AddressValueError(msg % (self._HEXTET_COUNT-1, ip_str))
+ else:
+ # Otherwise, allocate the entire address to parts_hi. The
+ # endpoints could still be empty, but _parse_hextet() will check
+ # for that.
+ if len(parts) != self._HEXTET_COUNT:
+ msg = "Exactly %d parts expected without '::' in %r"
+ raise AddressValueError(msg % (self._HEXTET_COUNT, ip_str))
+ if not parts[0]:
+ msg = "Leading ':' only permitted as part of '::' in %r"
+ raise AddressValueError(msg % ip_str) # ^: requires ^::
+ if not parts[-1]:
+ msg = "Trailing ':' only permitted as part of '::' in %r"
+ raise AddressValueError(msg % ip_str) # :$ requires ::$
+ parts_hi = len(parts)
+ parts_lo = 0
+ parts_skipped = 0
+
+ try:
+ # Now, parse the hextets into a 128-bit integer.
+ ip_int = 0
+ for i in range(parts_hi):
+ ip_int <<= 16
+ ip_int |= self._parse_hextet(parts[i])
+ ip_int <<= 16 * parts_skipped
+ for i in range(-parts_lo, 0):
+ ip_int <<= 16
+ ip_int |= self._parse_hextet(parts[i])
+ return ip_int
+ except ValueError as exc:
+ raise AddressValueError("%s in %r" % (exc, ip_str)) from None
+
+ def _parse_hextet(self, hextet_str):
+ """Convert an IPv6 hextet string into an integer.
+
+ Args:
+ hextet_str: A string, the number to parse.
+
+ Returns:
+ The hextet as an integer.
+
+ Raises:
+ ValueError: if the input isn't strictly a hex number from
+ [0..FFFF].
+
+ """
+ # Whitelist the characters, since int() allows a lot of bizarre stuff.
+ if not self._HEX_DIGITS.issuperset(hextet_str):
+ raise ValueError("Only hex digits permitted in %r" % hextet_str)
+ # We do the length check second, since the invalid character error
+ # is likely to be more informative for the user
+ if len(hextet_str) > 4:
+ msg = "At most 4 characters permitted in %r"
+ raise ValueError(msg % hextet_str)
+ # Length check means we can skip checking the integer value
+ return int(hextet_str, 16)
+
+ def _compress_hextets(self, hextets):
+ """Compresses a list of hextets.
+
+ Compresses a list of strings, replacing the longest continuous
+ sequence of "0" in the list with "" and adding empty strings at
+ the beginning or at the end of the string such that subsequently
+ calling ":".join(hextets) will produce the compressed version of
+ the IPv6 address.
+
+ Args:
+ hextets: A list of strings, the hextets to compress.
+
+ Returns:
+ A list of strings.
+
+ """
+ best_doublecolon_start = -1
+ best_doublecolon_len = 0
+ doublecolon_start = -1
+ doublecolon_len = 0
+ for index, hextet in enumerate(hextets):
+ if hextet == '0':
+ doublecolon_len += 1
+ if doublecolon_start == -1:
+ # Start of a sequence of zeros.
+ doublecolon_start = index
+ if doublecolon_len > best_doublecolon_len:
+ # This is the longest sequence of zeros so far.
+ best_doublecolon_len = doublecolon_len
+ best_doublecolon_start = doublecolon_start
+ else:
+ doublecolon_len = 0
+ doublecolon_start = -1
+
+ if best_doublecolon_len > 1:
+ best_doublecolon_end = (best_doublecolon_start +
+ best_doublecolon_len)
+ # For zeros at the end of the address.
+ if best_doublecolon_end == len(hextets):
+ hextets += ['']
+ hextets[best_doublecolon_start:best_doublecolon_end] = ['']
+ # For zeros at the beginning of the address.
+ if best_doublecolon_start == 0:
+ hextets = [''] + hextets
+
+ return hextets
+
+ def _string_from_ip_int(self, ip_int=None):
+ """Turns a 128-bit integer into hexadecimal notation.
+
+ Args:
+ ip_int: An integer, the IP address.
+
+ Returns:
+ A string, the hexadecimal representation of the address.
+
+ Raises:
+ ValueError: The address is bigger than 128 bits of all ones.
+
+ """
+ if ip_int is None:
+ ip_int = int(self._ip)
+
+ if ip_int > self._ALL_ONES:
+ raise ValueError('IPv6 address is too large')
+
+ hex_str = '%032x' % ip_int
+ hextets = ['%x' % int(hex_str[x:x+4], 16) for x in range(0, 32, 4)]
+
+ hextets = self._compress_hextets(hextets)
+ return ':'.join(hextets)
+
+ def _explode_shorthand_ip_string(self):
+ """Expand a shortened IPv6 address.
+
+ Args:
+ ip_str: A string, the IPv6 address.
+
+ Returns:
+ A string, the expanded IPv6 address.
+
+ """
+ if isinstance(self, IPv6Network):
+ ip_str = str(self.network_address)
+ elif isinstance(self, IPv6Interface):
+ ip_str = str(self.ip)
+ else:
+ ip_str = str(self)
+
+ ip_int = self._ip_int_from_string(ip_str)
+ hex_str = '%032x' % ip_int
+ parts = [hex_str[x:x+4] for x in range(0, 32, 4)]
+ if isinstance(self, (_BaseNetwork, IPv6Interface)):
+ return '%s/%d' % (':'.join(parts), self._prefixlen)
+ return ':'.join(parts)
+
+ @property
+ def max_prefixlen(self):
+ return self._max_prefixlen
+
+ @property
+ def version(self):
+ return self._version
+
+
+class IPv6Address(_BaseV6, _BaseAddress):
+
+ """Represent and manipulate single IPv6 Addresses."""
+
+ def __init__(self, address):
+ """Instantiate a new IPv6 address object.
+
+ Args:
+ address: A string or integer representing the IP
+
+ Additionally, an integer can be passed, so
+ IPv6Address('2001:db8::') ==
+ IPv6Address(42540766411282592856903984951653826560)
+ or, more generally
+ IPv6Address(int(IPv6Address('2001:db8::'))) ==
+ IPv6Address('2001:db8::')
+
+ Raises:
+ AddressValueError: If address isn't a valid IPv6 address.
+
+ """
+ _BaseAddress.__init__(self, address)
+ _BaseV6.__init__(self, address)
+
+ # Efficient constructor from integer.
+ if isinstance(address, int):
+ self._check_int_address(address)
+ self._ip = address
+ return
+
+ # Constructing from a packed address
+ if isinstance(address, bytes):
+ self._check_packed_address(address, 16)
+ self._ip = int.from_bytes(address, 'big')
+ return
+
+ # Assume input argument to be string or any object representation
+ # which converts into a formatted IP string.
+ addr_str = str(address)
+ self._ip = self._ip_int_from_string(addr_str)
+
+ @property
+ def packed(self):
+ """The binary representation of this address."""
+ return v6_int_to_packed(self._ip)
+
+ @property
+ def is_multicast(self):
+ """Test if the address is reserved for multicast use.
+
+ Returns:
+ A boolean, True if the address is a multicast address.
+ See RFC 2373 2.7 for details.
+
+ """
+ multicast_network = IPv6Network('ff00::/8')
+ return self in multicast_network
+
+ @property
+ def is_reserved(self):
+ """Test if the address is otherwise IETF reserved.
+
+ Returns:
+ A boolean, True if the address is within one of the
+ reserved IPv6 Network ranges.
+
+ """
+ reserved_networks = [IPv6Network('::/8'), IPv6Network('100::/8'),
+ IPv6Network('200::/7'), IPv6Network('400::/6'),
+ IPv6Network('800::/5'), IPv6Network('1000::/4'),
+ IPv6Network('4000::/3'), IPv6Network('6000::/3'),
+ IPv6Network('8000::/3'), IPv6Network('A000::/3'),
+ IPv6Network('C000::/3'), IPv6Network('E000::/4'),
+ IPv6Network('F000::/5'), IPv6Network('F800::/6'),
+ IPv6Network('FE00::/9')]
+
+ return any(self in x for x in reserved_networks)
+
+ @property
+ def is_link_local(self):
+ """Test if the address is reserved for link-local.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 4291.
+
+ """
+ linklocal_network = IPv6Network('fe80::/10')
+ return self in linklocal_network
+
+ @property
+ def is_site_local(self):
+ """Test if the address is reserved for site-local.
+
+ Note that the site-local address space has been deprecated by RFC 3879.
+ Use is_private to test if this address is in the space of unique local
+ addresses as defined by RFC 4193.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 3513 2.5.6.
+
+ """
+ sitelocal_network = IPv6Network('fec0::/10')
+ return self in sitelocal_network
+
+ @property
+ def is_private(self):
+ """Test if this address is allocated for private networks.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 4193.
+
+ """
+ private_network = IPv6Network('fc00::/7')
+ return self in private_network
+
+ @property
+ def is_unspecified(self):
+ """Test if the address is unspecified.
+
+ Returns:
+ A boolean, True if this is the unspecified address as defined in
+ RFC 2373 2.5.2.
+
+ """
+ return self._ip == 0
+
+ @property
+ def is_loopback(self):
+ """Test if the address is a loopback address.
+
+ Returns:
+ A boolean, True if the address is a loopback address as defined in
+ RFC 2373 2.5.3.
+
+ """
+ return self._ip == 1
+
+ @property
+ def ipv4_mapped(self):
+ """Return the IPv4 mapped address.
+
+ Returns:
+ If the IPv6 address is a v4 mapped address, return the
+ IPv4 mapped address. Return None otherwise.
+
+ """
+ if (self._ip >> 32) != 0xFFFF:
+ return None
+ return IPv4Address(self._ip & 0xFFFFFFFF)
+
+ @property
+ def teredo(self):
+ """Tuple of embedded teredo IPs.
+
+ Returns:
+ Tuple of the (server, client) IPs or None if the address
+ doesn't appear to be a teredo address (doesn't start with
+ 2001::/32)
+
+ """
+ if (self._ip >> 96) != 0x20010000:
+ return None
+ return (IPv4Address((self._ip >> 64) & 0xFFFFFFFF),
+ IPv4Address(~self._ip & 0xFFFFFFFF))
+
+ @property
+ def sixtofour(self):
+ """Return the IPv4 6to4 embedded address.
+
+ Returns:
+ The IPv4 6to4-embedded address if present or None if the
+ address doesn't appear to contain a 6to4 embedded address.
+
+ """
+ if (self._ip >> 112) != 0x2002:
+ return None
+ return IPv4Address((self._ip >> 80) & 0xFFFFFFFF)
+
+
+class IPv6Interface(IPv6Address):
+
+ def __init__(self, address):
+ if isinstance(address, (bytes, int)):
+ IPv6Address.__init__(self, address)
+ self.network = IPv6Network(self._ip)
+ self._prefixlen = self._max_prefixlen
+ return
+
+ addr = _split_optional_netmask(address)
+ IPv6Address.__init__(self, addr[0])
+ self.network = IPv6Network(address, strict=False)
+ self.netmask = self.network.netmask
+ self._prefixlen = self.network._prefixlen
+ self.hostmask = self.network.hostmask
+
+ def __str__(self):
+ return '%s/%d' % (self._string_from_ip_int(self._ip),
+ self.network.prefixlen)
+
+ def __eq__(self, other):
+ address_equal = IPv6Address.__eq__(self, other)
+ if not address_equal or address_equal is NotImplemented:
+ return address_equal
+ try:
+ return self.network == other.network
+ except AttributeError:
+ # An interface with an associated network is NOT the
+ # same as an unassociated address. That's why the hash
+ # takes the extra info into account.
+ return False
+
+ def __lt__(self, other):
+ address_less = IPv6Address.__lt__(self, other)
+ if address_less is NotImplemented:
+ return NotImplemented
+ try:
+ return self.network < other.network
+ except AttributeError:
+ # We *do* allow addresses and interfaces to be sorted. The
+ # unassociated address is considered less than all interfaces.
+ return False
+
+ def __hash__(self):
+ return self._ip ^ self._prefixlen ^ int(self.network.network_address)
+
+ @property
+ def ip(self):
+ return IPv6Address(self._ip)
+
+ @property
+ def with_prefixlen(self):
+ return '%s/%s' % (self._string_from_ip_int(self._ip),
+ self._prefixlen)
+
+ @property
+ def with_netmask(self):
+ return '%s/%s' % (self._string_from_ip_int(self._ip),
+ self.netmask)
+
+ @property
+ def with_hostmask(self):
+ return '%s/%s' % (self._string_from_ip_int(self._ip),
+ self.hostmask)
+
+ @property
+ def is_unspecified(self):
+ return self._ip == 0 and self.network.is_unspecified
+
+ @property
+ def is_loopback(self):
+ return self._ip == 1 and self.network.is_loopback
+
+
+class IPv6Network(_BaseV6, _BaseNetwork):
+
+ """This class represents and manipulates 128-bit IPv6 networks.
+
+ Attributes: [examples for IPv6('2001:db8::1000/124')]
+ .network_address: IPv6Address('2001:db8::1000')
+ .hostmask: IPv6Address('::f')
+ .broadcast_address: IPv6Address('2001:db8::100f')
+ .netmask: IPv6Address('ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0')
+ .prefixlen: 124
+
+ """
+
+ # Class to use when creating address objects
+ _address_class = IPv6Address
+
+ def __init__(self, address, strict=True):
+ """Instantiate a new IPv6 Network object.
+
+ Args:
+ address: A string or integer representing the IPv6 network or the
+ IP and prefix/netmask.
+ '2001:db8::/128'
+ '2001:db8:0000:0000:0000:0000:0000:0000/128'
+ '2001:db8::'
+ are all functionally the same in IPv6. That is to say,
+ failing to provide a subnetmask will create an object with
+ a mask of /128.
+
+ Additionally, an integer can be passed, so
+ IPv6Network('2001:db8::') ==
+ IPv6Network(42540766411282592856903984951653826560)
+ or, more generally
+ IPv6Network(int(IPv6Network('2001:db8::'))) ==
+ IPv6Network('2001:db8::')
+
+ strict: A boolean. If true, ensure that we have been passed
+ A true network address, eg, 2001:db8::1000/124 and not an
+ IP address on a network, eg, 2001:db8::1/124.
+
+ Raises:
+ AddressValueError: If address isn't a valid IPv6 address.
+ NetmaskValueError: If the netmask isn't valid for
+ an IPv6 address.
+ ValueError: If strict was True and a network address was not
+ supplied.
+
+ """
+ _BaseV6.__init__(self, address)
+ _BaseNetwork.__init__(self, address)
+
+ # Efficient constructor from integer.
+ if isinstance(address, int):
+ self.network_address = IPv6Address(address)
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv6Address(self._ALL_ONES)
+ return
+
+ # Constructing from a packed address
+ if isinstance(address, bytes):
+ self.network_address = IPv6Address(address)
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv6Address(self._ALL_ONES)
+ return
+
+ # Assume input argument to be string or any object representation
+ # which converts into a formatted IP prefix string.
+ addr = _split_optional_netmask(address)
+
+ self.network_address = IPv6Address(self._ip_int_from_string(addr[0]))
+
+ if len(addr) == 2:
+ if self._is_valid_netmask(addr[1]):
+ self._prefixlen = int(addr[1])
+ else:
+ raise NetmaskValueError('%r is not a valid netmask'
+ % addr[1])
+ else:
+ self._prefixlen = self._max_prefixlen
+
+ self.netmask = IPv6Address(self._ip_int_from_prefix(self._prefixlen))
+ if strict:
+ if (IPv6Address(int(self.network_address) & int(self.netmask)) !=
+ self.network_address):
+ raise ValueError('%s has host bits set' % self)
+ self.network_address = IPv6Address(int(self.network_address) &
+ int(self.netmask))
+
+ if self._prefixlen == (self._max_prefixlen - 1):
+ self.hosts = self.__iter__
+
+ def _is_valid_netmask(self, prefixlen):
+ """Verify that the netmask/prefixlen is valid.
+
+ Args:
+ prefixlen: A string, the netmask in prefix length format.
+
+ Returns:
+ A boolean, True if the prefix represents a valid IPv6
+ netmask.
+
+ """
+ try:
+ prefixlen = int(prefixlen)
+ except ValueError:
+ return False
+ return 0 <= prefixlen <= self._max_prefixlen
+
+ @property
+ def is_site_local(self):
+ """Test if the address is reserved for site-local.
+
+ Note that the site-local address space has been deprecated by RFC 3879.
+ Use is_private to test if this address is in the space of unique local
+ addresses as defined by RFC 4193.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 3513 2.5.6.
+
+ """
+ return (self.network_address.is_site_local and
+ self.broadcast_address.is_site_local)
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py
index dc1155b..07fd696 100644
--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
@@ -121,8 +121,7 @@ def py_scanstring(s, end, strict=True,
msg = "Invalid \\uXXXX escape"
raise ValueError(errmsg(msg, s, end))
uni = int(esc, 16)
- # Check for surrogate pair on UCS-4 systems
- if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
+ if 0xd800 <= uni <= 0xdbff:
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
if not s[end + 5:end + 7] == '\\u':
raise ValueError(errmsg(msg, s, end))
diff --git a/Lib/lib2to3/__main__.py b/Lib/lib2to3/__main__.py
new file mode 100644
index 0000000..80688ba
--- /dev/null
+++ b/Lib/lib2to3/__main__.py
@@ -0,0 +1,4 @@
+import sys
+from .main import main
+
+sys.exit(main("lib2to3.fixes"))
diff --git a/Lib/lib2to3/fixer_base.py b/Lib/lib2to3/fixer_base.py
index afc0467..b176056 100644
--- a/Lib/lib2to3/fixer_base.py
+++ b/Lib/lib2to3/fixer_base.py
@@ -27,7 +27,6 @@ class BaseFix(object):
pattern_tree = None # Tree representation of the pattern
options = None # Options object passed to initializer
filename = None # The filename (set by set_filename)
- logger = None # A logger (set by set_filename)
numbers = itertools.count(1) # For new_name()
used_names = set() # A set of all used NAMEs
order = "post" # Does the fixer prefer pre- or post-order traversal
@@ -70,12 +69,11 @@ class BaseFix(object):
with_tree=True)
def set_filename(self, filename):
- """Set the filename, and a logger derived from it.
+ """Set the filename.
The main refactoring tool should call this.
"""
self.filename = filename
- self.logger = logging.getLogger(filename)
def match(self, node):
"""Returns match for a given parse tree node.
diff --git a/Lib/lib2to3/pytree.py b/Lib/lib2to3/pytree.py
index fa4942f3..17cbf0a 100644
--- a/Lib/lib2to3/pytree.py
+++ b/Lib/lib2to3/pytree.py
@@ -109,26 +109,6 @@ class Base(object):
"""
raise NotImplementedError
- def set_prefix(self, prefix):
- """
- Set the prefix for the node (see Leaf class).
-
- DEPRECATED; use the prefix property directly.
- """
- warnings.warn("set_prefix() is deprecated; use the prefix property",
- DeprecationWarning, stacklevel=2)
- self.prefix = prefix
-
- def get_prefix(self):
- """
- Return the prefix for the node (see Leaf class).
-
- DEPRECATED; use the prefix property directly.
- """
- warnings.warn("get_prefix() is deprecated; use the prefix property",
- DeprecationWarning, stacklevel=2)
- return self.prefix
-
def replace(self, new):
"""Replace this node with a new one in the parent."""
assert self.parent is not None, str(self)
diff --git a/Lib/lib2to3/refactor.py b/Lib/lib2to3/refactor.py
index 38fb8ed..201e193 100644
--- a/Lib/lib2to3/refactor.py
+++ b/Lib/lib2to3/refactor.py
@@ -566,7 +566,7 @@ class RefactoringTool(object):
block_lineno = None
indent = None
lineno = 0
- for line in input.splitlines(True):
+ for line in input.splitlines(keepends=True):
lineno += 1
if line.lstrip().startswith(self.PS1):
if block is not None:
@@ -610,7 +610,7 @@ class RefactoringTool(object):
filename, lineno, err.__class__.__name__, err)
return block
if self.refactor_tree(tree, filename):
- new = str(tree).splitlines(True)
+ new = str(tree).splitlines(keepends=True)
# Undo the adjustment of the line numbers in wrap_toks() below.
clipped, new = new[:lineno-1], new[lineno-1:]
assert clipped == ["\n"] * (lineno-1), clipped
diff --git a/Lib/lib2to3/tests/test_pytree.py b/Lib/lib2to3/tests/test_pytree.py
index ac7d900..a2ab1f3 100644
--- a/Lib/lib2to3/tests/test_pytree.py
+++ b/Lib/lib2to3/tests/test_pytree.py
@@ -31,23 +31,6 @@ class TestNodes(support.TestCase):
"""Unit tests for nodes (Base, Leaf, Node)."""
- if sys.version_info >= (2,6):
- # warnings.catch_warnings is new in 2.6.
- def test_deprecated_prefix_methods(self):
- l = pytree.Leaf(100, "foo")
- with warnings.catch_warnings(record=True) as w:
- warnings.simplefilter("always", DeprecationWarning)
- self.assertEqual(l.get_prefix(), "")
- l.set_prefix("hi")
- self.assertEqual(l.prefix, "hi")
- self.assertEqual(len(w), 2)
- for warning in w:
- self.assertTrue(warning.category is DeprecationWarning)
- self.assertEqual(str(w[0].message), "get_prefix() is deprecated; " \
- "use the prefix property")
- self.assertEqual(str(w[1].message), "set_prefix() is deprecated; " \
- "use the prefix property")
-
def test_instantiate_base(self):
if __debug__:
# Test that instantiating Base() raises an AssertionError
diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py
index 4191b22..e79018f 100644
--- a/Lib/logging/__init__.py
+++ b/Lib/logging/__init__.py
@@ -36,15 +36,9 @@ __all__ = ['BASIC_FORMAT', 'BufferingFormatter', 'CRITICAL', 'DEBUG', 'ERROR',
'getLogRecordFactory', 'setLogRecordFactory', 'lastResort']
try:
- import codecs
-except ImportError:
- codecs = None
-
-try:
- import _thread as thread
import threading
-except ImportError:
- thread = None
+except ImportError: #pragma: no cover
+ threading = None
__author__ = "Vinay Sajip <vinay_sajip@red-dove.com>"
__status__ = "production"
@@ -65,16 +59,16 @@ else:
_srcfile = __file__
_srcfile = os.path.normcase(_srcfile)
-# next bit filched from 1.5.2's inspect.py
-def currentframe():
- """Return the frame object for the caller's stack frame."""
- try:
- raise Exception
- except:
- return sys.exc_info()[2].tb_frame.f_back
-if hasattr(sys, '_getframe'): currentframe = lambda: sys._getframe(3)
-# done filching
+if hasattr(sys, '_getframe'):
+ currentframe = lambda: sys._getframe(3)
+else: #pragma: no cover
+ def currentframe():
+ """Return the frame object for the caller's stack frame."""
+ try:
+ raise Exception
+ except:
+ return sys.exc_info()[2].tb_frame.f_back
# _srcfile is only used in conjunction with sys._getframe().
# To provide compatibility with older versions of Python, set _srcfile
@@ -92,22 +86,22 @@ _startTime = time.time()
#raiseExceptions is used to see if exceptions during handling should be
#propagated
#
-raiseExceptions = 1
+raiseExceptions = True
#
# If you don't want threading information in the log, set this to zero
#
-logThreads = 1
+logThreads = True
#
# If you don't want multiprocessing information in the log, set this to zero
#
-logMultiprocessing = 1
+logMultiprocessing = True
#
# If you don't want process information in the log, set this to zero
#
-logProcesses = 1
+logProcesses = True
#---------------------------------------------------------------------------
# Level related stuff
@@ -197,9 +191,9 @@ def _checkLevel(level):
#the lock would already have been acquired - so we need an RLock.
#The same argument applies to Loggers and Manager.loggerDict.
#
-if thread:
+if threading:
_lock = threading.RLock()
-else:
+else: #pragma: no cover
_lock = None
@@ -252,7 +246,7 @@ class LogRecord(object):
# during formatting, we test to see if the arg is present using
# 'if self.args:'. If the event being logged is e.g. 'Value is %d'
# and if the passed arg fails 'if self.args:' then no formatting
- # is done. For example, logger.warn('Value is %d', 0) would log
+ # is done. For example, logger.warning('Value is %d', 0) would log
# 'Value is %d' instead of 'Value is 0'.
# For the use case of passing a dictionary, this should not be a
# problem.
@@ -276,13 +270,13 @@ class LogRecord(object):
self.created = ct
self.msecs = (ct - int(ct)) * 1000
self.relativeCreated = (self.created - _startTime) * 1000
- if logThreads and thread:
- self.thread = thread.get_ident()
+ if logThreads and threading:
+ self.thread = threading.get_ident()
self.threadName = threading.current_thread().name
- else:
+ else: # pragma: no cover
self.thread = None
self.threadName = None
- if not logMultiprocessing:
+ if not logMultiprocessing: # pragma: no cover
self.processName = None
else:
self.processName = 'MainProcess'
@@ -294,7 +288,7 @@ class LogRecord(object):
# for an example
try:
self.processName = mp.current_process().name
- except Exception:
+ except Exception: #pragma: no cover
pass
if logProcesses and hasattr(os, 'getpid'):
self.process = os.getpid()
@@ -466,6 +460,9 @@ class Formatter(object):
self._fmt = self._style._fmt
self.datefmt = datefmt
+ default_time_format = '%Y-%m-%d %H:%M:%S'
+ default_msec_format = '%s,%03d'
+
def formatTime(self, record, datefmt=None):
"""
Return the creation time of the specified LogRecord as formatted text.
@@ -488,8 +485,8 @@ class Formatter(object):
if datefmt:
s = time.strftime(datefmt, ct)
else:
- t = time.strftime("%Y-%m-%d %H:%M:%S", ct)
- s = "%s,%03d" % (t, record.msecs) # the use of % here is internal
+ t = time.strftime(self.default_time_format, ct)
+ s = self.default_msec_format % (t, record.msecs)
return s
def formatException(self, ei):
@@ -642,11 +639,11 @@ class Filter(object):
yes. If deemed appropriate, the record may be modified in-place.
"""
if self.nlen == 0:
- return 1
+ return True
elif self.name == record.name:
- return 1
+ return True
elif record.name.find(self.name, 0, self.nlen) != 0:
- return 0
+ return False
return (record.name[self.nlen] == ".")
class Filterer(object):
@@ -686,14 +683,14 @@ class Filterer(object):
Allow filters to be just callables.
"""
- rv = 1
+ rv = True
for f in self.filters:
if hasattr(f, 'filter'):
result = f.filter(record)
else:
result = f(record) # assume callable - will raise if not
if not result:
- rv = 0
+ rv = False
break
return rv
@@ -771,9 +768,9 @@ class Handler(Filterer):
"""
Acquire a thread lock for serializing access to the underlying I/O.
"""
- if thread:
+ if threading:
self.lock = threading.RLock()
- else:
+ else: #pragma: no cover
self.lock = None
def acquire(self):
@@ -792,7 +789,7 @@ class Handler(Filterer):
def setLevel(self, level):
"""
- Set the logging level of this handler.
+ Set the logging level of this handler. level must be an int or a str.
"""
self.level = _checkLevel(level)
@@ -888,7 +885,7 @@ class Handler(Filterer):
None, sys.stderr)
sys.stderr.write('Logged from file %s, line %s\n' % (
record.filename, record.lineno))
- except IOError:
+ except IOError: #pragma: no cover
pass # see issue 5971
finally:
del ei
@@ -941,7 +938,7 @@ class StreamHandler(Handler):
stream.write(msg)
stream.write(self.terminator)
self.flush()
- except (KeyboardInterrupt, SystemExit):
+ except (KeyboardInterrupt, SystemExit): #pragma: no cover
raise
except:
self.handleError(record)
@@ -950,14 +947,12 @@ class FileHandler(StreamHandler):
"""
A handler class which writes formatted logging records to disk files.
"""
- def __init__(self, filename, mode='a', encoding=None, delay=0):
+ def __init__(self, filename, mode='a', encoding=None, delay=False):
"""
Open the specified file and use it as the stream for logging.
"""
#keep the absolute path, otherwise derived classes which use this
#may come a cropper when the current directory changes
- if codecs is None:
- encoding = None
self.baseFilename = os.path.abspath(filename)
self.mode = mode
self.encoding = encoding
@@ -989,11 +984,7 @@ class FileHandler(StreamHandler):
Open the current base file with the (original) mode and encoding.
Return the resulting stream.
"""
- if self.encoding is None:
- stream = open(self.baseFilename, self.mode)
- else:
- stream = codecs.open(self.baseFilename, self.mode, self.encoding)
- return stream
+ return open(self.baseFilename, self.mode, encoding=self.encoding)
def emit(self, record):
"""
@@ -1205,13 +1196,13 @@ class Logger(Filterer):
self.name = name
self.level = _checkLevel(level)
self.parent = None
- self.propagate = 1
+ self.propagate = True
self.handlers = []
- self.disabled = 0
+ self.disabled = False
def setLevel(self, level):
"""
- Set the logging level of this logger.
+ Set the logging level of this logger. level must be an int or a str.
"""
self.level = _checkLevel(level)
@@ -1251,7 +1242,10 @@ class Logger(Filterer):
if self.isEnabledFor(WARNING):
self._log(WARNING, msg, args, **kwargs)
- warn = warning
+ def warn(self, msg, *args, **kwargs):
+ warnings.warn("The 'warn' method is deprecated, "
+ "use 'warning' instead", DeprecationWarning, 2)
+ self.warning(msg, *args, **kwargs)
def error(self, msg, *args, **kwargs):
"""
@@ -1360,9 +1354,9 @@ class Logger(Filterer):
#IronPython can use logging.
try:
fn, lno, func, sinfo = self.findCaller(stack_info)
- except ValueError:
+ except ValueError: # pragma: no cover
fn, lno, func = "(unknown file)", 0, "(unknown function)"
- else:
+ else: # pragma: no cover
fn, lno, func = "(unknown file)", 0, "(unknown function)"
if exc_info:
if not isinstance(exc_info, tuple):
@@ -1474,7 +1468,7 @@ class Logger(Filterer):
Is this logger enabled for level 'level'?
"""
if self.manager.disable >= level:
- return 0
+ return False
return level >= self.getEffectiveLevel()
def getChild(self, suffix):
@@ -1564,7 +1558,10 @@ class LoggerAdapter(object):
"""
self.log(WARNING, msg, *args, **kwargs)
- warn = warning
+ def warn(self, msg, *args, **kwargs):
+ warnings.warn("The 'warn' method is deprecated, "
+ "use 'warning' instead", DeprecationWarning, 2)
+ self.warning(msg, *args, **kwargs)
def error(self, msg, *args, **kwargs):
"""
@@ -1576,7 +1573,7 @@ class LoggerAdapter(object):
"""
Delegate an exception call to the underlying logger.
"""
- kwargs["exc_info"] = 1
+ kwargs["exc_info"] = True
self.log(ERROR, msg, *args, **kwargs)
def critical(self, msg, *args, **kwargs):
@@ -1659,6 +1656,10 @@ def basicConfig(**kwargs):
stream Use the specified stream to initialize the StreamHandler. Note
that this argument is incompatible with 'filename' - if both
are present, 'stream' is ignored.
+ handlers If specified, this should be an iterable of already created
+ handlers, which will be added to the root handler. Any handler
+ in the list which does not have a formatter assigned will be
+ assigned the formatter created in this function.
Note that you could specify a stream created using open(filename, mode)
rather than passing the filename and mode in. However, it should be
@@ -1666,27 +1667,47 @@ def basicConfig(**kwargs):
using sys.stdout or sys.stderr), whereas FileHandler closes its stream
when the handler is closed.
- .. versionchanged: 3.2
+ .. versionchanged:: 3.2
Added the ``style`` parameter.
+
+ .. versionchanged:: 3.3
+ Added the ``handlers`` parameter. A ``ValueError`` is now thrown for
+ incompatible arguments (e.g. ``handlers`` specified together with
+ ``filename``/``filemode``, or ``filename``/``filemode`` specified
+ together with ``stream``, or ``handlers`` specified together with
+ ``stream``.
"""
# Add thread safety in case someone mistakenly calls
# basicConfig() from multiple threads
_acquireLock()
try:
if len(root.handlers) == 0:
- filename = kwargs.get("filename")
- if filename:
- mode = kwargs.get("filemode", 'a')
- hdlr = FileHandler(filename, mode)
+ handlers = kwargs.get("handlers")
+ if handlers is None:
+ if "stream" in kwargs and "filename" in kwargs:
+ raise ValueError("'stream' and 'filename' should not be "
+ "specified together")
else:
- stream = kwargs.get("stream")
- hdlr = StreamHandler(stream)
+ if "stream" in kwargs or "filename" in kwargs:
+ raise ValueError("'stream' or 'filename' should not be "
+ "specified together with 'handlers'")
+ if handlers is None:
+ filename = kwargs.get("filename")
+ if filename:
+ mode = kwargs.get("filemode", 'a')
+ h = FileHandler(filename, mode)
+ else:
+ stream = kwargs.get("stream")
+ h = StreamHandler(stream)
+ handlers = [h]
fs = kwargs.get("format", BASIC_FORMAT)
dfs = kwargs.get("datefmt", None)
style = kwargs.get("style", '%')
fmt = Formatter(fs, dfs, style)
- hdlr.setFormatter(fmt)
- root.addHandler(hdlr)
+ for h in handlers:
+ if h.formatter is None:
+ h.setFormatter(fmt)
+ root.addHandler(h)
level = kwargs.get("level")
if level is not None:
root.setLevel(level)
@@ -1750,7 +1771,10 @@ def warning(msg, *args, **kwargs):
basicConfig()
root.warning(msg, *args, **kwargs)
-warn = warning
+def warn(msg, *args, **kwargs):
+ warnings.warn("The 'warn' function is deprecated, "
+ "use 'warning' instead", DeprecationWarning, 2)
+ warning(msg, *args, **kwargs)
def info(msg, *args, **kwargs):
"""
@@ -1835,10 +1859,10 @@ class NullHandler(Handler):
package.
"""
def handle(self, record):
- pass
+ """Stub."""
def emit(self, record):
- pass
+ """Stub."""
def createLock(self):
self.lock = None
diff --git a/Lib/logging/config.py b/Lib/logging/config.py
index 373da2b..5ef5c91 100644
--- a/Lib/logging/config.py
+++ b/Lib/logging/config.py
@@ -24,13 +24,13 @@ Copyright (C) 2001-2010 Vinay Sajip. All Rights Reserved.
To use, simply 'import logging' and log away!
"""
-import sys, logging, logging.handlers, socket, struct, os, traceback, re
-import types, io
+import sys, logging, logging.handlers, socket, struct, traceback, re
+import io
try:
import _thread as thread
import threading
-except ImportError:
+except ImportError: #pragma: no cover
thread = None
from socketserver import ThreadingTCPServer, StreamRequestHandler
@@ -98,9 +98,6 @@ def _resolve(name):
def _strip_spaces(alist):
return map(lambda x: x.strip(), alist)
-def _encoded(s):
- return s if isinstance(s, str) else s.encode('utf-8')
-
def _create_formatters(cp):
"""Create and return formatters"""
flist = cp["formatters"]["keys"]
@@ -215,7 +212,7 @@ def _install_loggers(cp, handlers, disable_existing):
#avoid disabling child loggers of explicitly
#named loggers. With a sorted list it is easier
#to find the child loggers.
- existing.sort(key=_encoded)
+ existing.sort()
#We'll keep the list of existing loggers
#which are children of named loggers here...
child_loggers = []
@@ -588,7 +585,7 @@ class DictConfigurator(BaseConfigurator):
#avoid disabling child loggers of explicitly
#named loggers. With a sorted list it is easier
#to find the child loggers.
- existing.sort(key=_encoded)
+ existing.sort()
#We'll keep the list of existing loggers
#which are children of named loggers here...
child_loggers = []
@@ -786,7 +783,7 @@ def listen(port=DEFAULT_LOGGING_CONFIG_PORT):
and which you can join() when appropriate. To stop the server, call
stopListening().
"""
- if not thread:
+ if not thread: #pragma: no cover
raise NotImplementedError("listen() needs threading to work")
class ConfigStreamHandler(StreamRequestHandler):
@@ -804,7 +801,6 @@ def listen(port=DEFAULT_LOGGING_CONFIG_PORT):
struct.pack(">L", n), followed by the config file.
Uses fileConfig() to do the grunt work.
"""
- import tempfile
try:
conn = self.connection
chunk = conn.recv(4)
@@ -825,7 +821,7 @@ def listen(port=DEFAULT_LOGGING_CONFIG_PORT):
file = io.StringIO(chunk)
try:
fileConfig(file)
- except (KeyboardInterrupt, SystemExit):
+ except (KeyboardInterrupt, SystemExit): #pragma: no cover
raise
except:
traceback.print_exc()
diff --git a/Lib/logging/handlers.py b/Lib/logging/handlers.py
index 8349d3a..f286cd6 100644
--- a/Lib/logging/handlers.py
+++ b/Lib/logging/handlers.py
@@ -24,18 +24,14 @@ To use, simply 'import logging.handlers' and log away!
"""
import errno, logging, socket, os, pickle, struct, time, re
+from codecs import BOM_UTF8
from stat import ST_DEV, ST_INO, ST_MTIME
import queue
try:
import threading
-except ImportError:
+except ImportError: #pragma: no cover
threading = None
-try:
- import codecs
-except ImportError:
- codecs = None
-
#
# Some constants...
#
@@ -55,15 +51,15 @@ class BaseRotatingHandler(logging.FileHandler):
Not meant to be instantiated directly. Instead, use RotatingFileHandler
or TimedRotatingFileHandler.
"""
- def __init__(self, filename, mode, encoding=None, delay=0):
+ def __init__(self, filename, mode, encoding=None, delay=False):
"""
Use the specified filename for streamed logging
"""
- if codecs is None:
- encoding = None
logging.FileHandler.__init__(self, filename, mode, encoding, delay)
self.mode = mode
self.encoding = encoding
+ self.namer = None
+ self.rotator = None
def emit(self, record):
"""
@@ -76,17 +72,55 @@ class BaseRotatingHandler(logging.FileHandler):
if self.shouldRollover(record):
self.doRollover()
logging.FileHandler.emit(self, record)
- except (KeyboardInterrupt, SystemExit):
+ except (KeyboardInterrupt, SystemExit): #pragma: no cover
raise
except:
self.handleError(record)
+ def rotation_filename(self, default_name):
+ """
+ Modify the filename of a log file when rotating.
+
+ This is provided so that a custom filename can be provided.
+
+ The default implementation calls the 'namer' attribute of the
+ handler, if it's callable, passing the default name to
+ it. If the attribute isn't callable (the default is None), the name
+ is returned unchanged.
+
+ :param default_name: The default name for the log file.
+ """
+ if not callable(self.namer):
+ result = default_name
+ else:
+ result = self.namer(default_name)
+ return result
+
+ def rotate(self, source, dest):
+ """
+ When rotating, rotate the current log.
+
+ The default implementation calls the 'rotator' attribute of the
+ handler, if it's callable, passing the source and dest arguments to
+ it. If the attribute isn't callable (the default is None), the source
+ is simply renamed to the destination.
+
+ :param source: The source filename. This is normally the base
+ filename, e.g. 'test.log'
+ :param dest: The destination filename. This is normally
+ what the source is rotated to, e.g. 'test.log.1'.
+ """
+ if not callable(self.rotator):
+ os.rename(source, dest)
+ else:
+ self.rotator(source, dest)
+
class RotatingFileHandler(BaseRotatingHandler):
"""
Handler for logging to a set of files, which switches from one file
to the next when the current file reaches a certain size.
"""
- def __init__(self, filename, mode='a', maxBytes=0, backupCount=0, encoding=None, delay=0):
+ def __init__(self, filename, mode='a', maxBytes=0, backupCount=0, encoding=None, delay=False):
"""
Open the specified file and use it as the stream for logging.
@@ -127,16 +161,17 @@ class RotatingFileHandler(BaseRotatingHandler):
self.stream = None
if self.backupCount > 0:
for i in range(self.backupCount - 1, 0, -1):
- sfn = "%s.%d" % (self.baseFilename, i)
- dfn = "%s.%d" % (self.baseFilename, i + 1)
+ sfn = self.rotation_filename("%s.%d" % (self.baseFilename, i))
+ dfn = self.rotation_filename("%s.%d" % (self.baseFilename,
+ i + 1))
if os.path.exists(sfn):
if os.path.exists(dfn):
os.remove(dfn)
os.rename(sfn, dfn)
- dfn = self.baseFilename + ".1"
+ dfn = self.rotation_filename(self.baseFilename + ".1")
if os.path.exists(dfn):
os.remove(dfn)
- os.rename(self.baseFilename, dfn)
+ self.rotate(self.baseFilename, dfn)
self.stream = self._open()
def shouldRollover(self, record):
@@ -183,19 +218,19 @@ class TimedRotatingFileHandler(BaseRotatingHandler):
if self.when == 'S':
self.interval = 1 # one second
self.suffix = "%Y-%m-%d_%H-%M-%S"
- self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}$"
+ self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}(\.\w+)?$"
elif self.when == 'M':
self.interval = 60 # one minute
self.suffix = "%Y-%m-%d_%H-%M"
- self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}$"
+ self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}(\.\w+)?$"
elif self.when == 'H':
self.interval = 60 * 60 # one hour
self.suffix = "%Y-%m-%d_%H"
- self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}$"
+ self.extMatch = r"^\d{4}-\d{2}-\d{2}_\d{2}(\.\w+)?$"
elif self.when == 'D' or self.when == 'MIDNIGHT':
self.interval = 60 * 60 * 24 # one day
self.suffix = "%Y-%m-%d"
- self.extMatch = r"^\d{4}-\d{2}-\d{2}$"
+ self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
elif self.when.startswith('W'):
self.interval = 60 * 60 * 24 * 7 # one week
if len(self.when) != 2:
@@ -204,7 +239,7 @@ class TimedRotatingFileHandler(BaseRotatingHandler):
raise ValueError("Invalid day specified for weekly rollover: %s" % self.when)
self.dayOfWeek = int(self.when[1])
self.suffix = "%Y-%m-%d"
- self.extMatch = r"^\d{4}-\d{2}-\d{2}$"
+ self.extMatch = r"^\d{4}-\d{2}-\d{2}(\.\w+)?$"
else:
raise ValueError("Invalid rollover interval specified: %s" % self.when)
@@ -337,10 +372,11 @@ class TimedRotatingFileHandler(BaseRotatingHandler):
else:
addend = -3600
timeTuple = time.localtime(t + addend)
- dfn = self.baseFilename + "." + time.strftime(self.suffix, timeTuple)
+ dfn = self.rotation_filename(self.baseFilename + "." +
+ time.strftime(self.suffix, timeTuple))
if os.path.exists(dfn):
os.remove(dfn)
- os.rename(self.baseFilename, dfn)
+ self.rotate(self.baseFilename, dfn)
if self.backupCount > 0:
for s in self.getFilesToDelete():
os.remove(s)
@@ -379,7 +415,7 @@ class WatchedFileHandler(logging.FileHandler):
This handler is based on a suggestion and patch by Chad J.
Schroeder.
"""
- def __init__(self, filename, mode='a', encoding=None, delay=0):
+ def __init__(self, filename, mode='a', encoding=None, delay=False):
logging.FileHandler.__init__(self, filename, mode, encoding, delay)
self.dev, self.ino = -1, -1
self._statstream()
@@ -438,15 +474,15 @@ class SocketHandler(logging.Handler):
"""
Initializes the handler with a specific host address and port.
- The attribute 'closeOnError' is set to 1 - which means that if
- a socket error occurs, the socket is silently closed and then
- reopened on the next logging call.
+ When the attribute *closeOnError* is set to True - if a socket error
+ occurs, the socket is silently closed and then reopened on the next
+ logging call.
"""
logging.Handler.__init__(self)
self.host = host
self.port = port
self.sock = None
- self.closeOnError = 0
+ self.closeOnError = False
self.retryTime = None
#
# Exponential backoff parameters.
@@ -463,8 +499,12 @@ class SocketHandler(logging.Handler):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
if hasattr(s, 'settimeout'):
s.settimeout(timeout)
- s.connect((self.host, self.port))
- return s
+ try:
+ s.connect((self.host, self.port))
+ return s
+ except socket.error:
+ s.close()
+ raise
def createSocket(self):
"""
@@ -477,7 +517,7 @@ class SocketHandler(logging.Handler):
# is the first time back after a disconnect, or
# we've waited long enough.
if self.retryTime is None:
- attempt = 1
+ attempt = True
else:
attempt = (now >= self.retryTime)
if attempt:
@@ -510,14 +550,14 @@ class SocketHandler(logging.Handler):
try:
if hasattr(self.sock, "sendall"):
self.sock.sendall(s)
- else:
+ else: #pragma: no cover
sentsofar = 0
left = len(s)
while left > 0:
sent = self.sock.send(s[sentsofar:])
sentsofar = sentsofar + sent
left = left - sent
- except socket.error:
+ except socket.error: #pragma: no cover
self.sock.close()
self.sock = None # so we can call createSocket next time
@@ -567,7 +607,7 @@ class SocketHandler(logging.Handler):
try:
s = self.makePickle(record)
self.send(s)
- except (KeyboardInterrupt, SystemExit):
+ except (KeyboardInterrupt, SystemExit): #pragma: no cover
raise
except:
self.handleError(record)
@@ -601,7 +641,7 @@ class DatagramHandler(SocketHandler):
Initializes the handler with a specific host address and port.
"""
SocketHandler.__init__(self, host, port)
- self.closeOnError = 0
+ self.closeOnError = False
def makeSocket(self):
"""
@@ -742,10 +782,10 @@ class SysLogHandler(logging.Handler):
self.socktype = socktype
if isinstance(address, str):
- self.unixsocket = 1
+ self.unixsocket = True
self._connect_unixsocket(address)
else:
- self.unixsocket = 0
+ self.unixsocket = False
self.socket = socket.socket(socket.AF_INET, socktype)
if socktype == socket.SOCK_STREAM:
self.socket.connect(address)
@@ -778,8 +818,7 @@ class SysLogHandler(logging.Handler):
"""
self.acquire()
try:
- if self.unixsocket:
- self.socket.close()
+ self.socket.close()
logging.Handler.close(self)
finally:
self.release()
@@ -794,6 +833,7 @@ class SysLogHandler(logging.Handler):
"""
return self.priority_map.get(levelName, "warning")
+ ident = '' # prepended to all messages
append_nul = True # some old syslog daemons expect a NUL terminator
def emit(self, record):
@@ -804,6 +844,8 @@ class SysLogHandler(logging.Handler):
exception information is present, it is NOT sent to the server.
"""
msg = self.format(record)
+ if self.ident:
+ msg = self.ident + msg
if self.append_nul:
msg += '\000'
"""
@@ -827,7 +869,7 @@ class SysLogHandler(logging.Handler):
self.socket.sendto(msg, self.address)
else:
self.socket.sendall(msg)
- except (KeyboardInterrupt, SystemExit):
+ except (KeyboardInterrupt, SystemExit): #pragma: no cover
raise
except:
self.handleError(record)
@@ -837,7 +879,7 @@ class SMTPHandler(logging.Handler):
A handler class which sends an SMTP email for each logging event.
"""
def __init__(self, mailhost, fromaddr, toaddrs, subject,
- credentials=None, secure=None):
+ credentials=None, secure=None, timeout=5.0):
"""
Initialize the handler.
@@ -851,6 +893,8 @@ class SMTPHandler(logging.Handler):
will be either an empty tuple, or a single-value tuple with the name
of a keyfile, or a 2-value tuple with the names of the keyfile and
certificate file. (This tuple is passed to the `starttls` method).
+ A timeout in seconds can be specified for the SMTP connection (the
+ default is one second).
"""
logging.Handler.__init__(self)
if isinstance(mailhost, tuple):
@@ -867,7 +911,7 @@ class SMTPHandler(logging.Handler):
self.toaddrs = toaddrs
self.subject = subject
self.secure = secure
- self._timeout = 5.0
+ self.timeout = timeout
def getSubject(self, record):
"""
@@ -890,7 +934,7 @@ class SMTPHandler(logging.Handler):
port = self.mailport
if not port:
port = smtplib.SMTP_PORT
- smtp = smtplib.SMTP(self.mailhost, port, timeout=self._timeout)
+ smtp = smtplib.SMTP(self.mailhost, port, timeout=self.timeout)
msg = self.format(record)
msg = "From: %s\r\nTo: %s\r\nSubject: %s\r\nDate: %s\r\n\r\n%s" % (
self.fromaddr,
@@ -905,7 +949,7 @@ class SMTPHandler(logging.Handler):
smtp.login(self.username, self.password)
smtp.sendmail(self.fromaddr, self.toaddrs, msg)
smtp.quit()
- except (KeyboardInterrupt, SystemExit):
+ except (KeyboardInterrupt, SystemExit): #pragma: no cover
raise
except:
self.handleError(record)
@@ -992,7 +1036,7 @@ class NTEventLogHandler(logging.Handler):
type = self.getEventType(record)
msg = self.format(record)
self._welu.ReportEvent(self.appname, id, cat, type, [msg])
- except (KeyboardInterrupt, SystemExit):
+ except (KeyboardInterrupt, SystemExit): #pragma: no cover
raise
except:
self.handleError(record)
@@ -1075,9 +1119,11 @@ class HTTPHandler(logging.Handler):
s = ('u%s:%s' % self.credentials).encode('utf-8')
s = 'Basic ' + base64.b64encode(s).strip()
h.putheader('Authorization', s)
- h.endheaders(data if self.method == "POST" else None)
+ h.endheaders()
+ if self.method == "POST":
+ h.send(data.encode('utf-8'))
h.getresponse() #can't do anything with the result
- except (KeyboardInterrupt, SystemExit):
+ except (KeyboardInterrupt, SystemExit): #pragma: no cover
raise
except:
self.handleError(record)
@@ -1259,7 +1305,7 @@ class QueueHandler(logging.Handler):
"""
try:
self.enqueue(self.prepare(record))
- except (KeyboardInterrupt, SystemExit):
+ except (KeyboardInterrupt, SystemExit): #pragma: no cover
raise
except:
self.handleError(record)
@@ -1356,6 +1402,16 @@ if threading:
except queue.Empty:
break
+ def enqueue_sentinel(self):
+ """
+ This is used to enqueue the sentinel record.
+
+ The base implementation uses put_nowait. You may want to override this
+ method if you want to use timeouts or work with custom queue
+ implementations.
+ """
+ self.queue.put_nowait(self._sentinel)
+
def stop(self):
"""
Stop the listener.
@@ -1365,6 +1421,6 @@ if threading:
may be some records still left on the queue, which won't be processed.
"""
self._stop.set()
- self.queue.put_nowait(self._sentinel)
+ self.enqueue_sentinel()
self._thread.join()
self._thread = None
diff --git a/Lib/lzma.py b/Lib/lzma.py
new file mode 100644
index 0000000..1a1b065
--- /dev/null
+++ b/Lib/lzma.py
@@ -0,0 +1,454 @@
+"""Interface to the liblzma compression library.
+
+This module provides a class for reading and writing compressed files,
+classes for incremental (de)compression, and convenience functions for
+one-shot (de)compression.
+
+These classes and functions support both the XZ and legacy LZMA
+container formats, as well as raw compressed data streams.
+"""
+
+__all__ = [
+ "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256",
+ "CHECK_ID_MAX", "CHECK_UNKNOWN",
+ "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64",
+ "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC",
+ "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW",
+ "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4",
+ "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
+
+ "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
+ "open", "compress", "decompress", "is_check_supported",
+]
+
+import builtins
+import io
+from _lzma import *
+from _lzma import _encode_filter_properties, _decode_filter_properties
+
+
+_MODE_CLOSED = 0
+_MODE_READ = 1
+_MODE_READ_EOF = 2
+_MODE_WRITE = 3
+
+_BUFFER_SIZE = 8192
+
+
+class LZMAFile(io.BufferedIOBase):
+
+ """A file object providing transparent LZMA (de)compression.
+
+ An LZMAFile can act as a wrapper for an existing file object, or
+ refer directly to a named file on disk.
+
+ Note that LZMAFile provides a *binary* file interface - data read
+ is returned as bytes, and data to be written must be given as bytes.
+ """
+
+ def __init__(self, filename=None, mode="r", *,
+ format=None, check=-1, preset=None, filters=None):
+ """Open an LZMA-compressed file in binary mode.
+
+ filename can be either an actual file name (given as a str or
+ bytes object), in which case the named file is opened, or it can
+ be an existing file object to read from or write to.
+
+ mode can be "r" for reading (default), "w" for (over)writing, or
+ "a" for appending. These can equivalently be given as "rb", "wb",
+ and "ab" respectively.
+
+ format specifies the container format to use for the file.
+ If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the
+ default is FORMAT_XZ.
+
+ check specifies the integrity check to use. This argument can
+ only be used when opening a file for writing. For FORMAT_XZ,
+ the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not
+ support integrity checks - for these formats, check must be
+ omitted, or be CHECK_NONE.
+
+ When opening a file for reading, the *preset* argument is not
+ meaningful, and should be omitted. The *filters* argument should
+ also be omitted, except when format is FORMAT_RAW (in which case
+ it is required).
+
+ When opening a file for writing, the settings used by the
+ compressor can be specified either as a preset compression
+ level (with the *preset* argument), or in detail as a custom
+ filter chain (with the *filters* argument). For FORMAT_XZ and
+ FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
+ level. For FORMAT_RAW, the caller must always specify a filter
+ chain; the raw compressor does not support preset compression
+ levels.
+
+ preset (if provided) should be an integer in the range 0-9,
+ optionally OR-ed with the constant PRESET_EXTREME.
+
+ filters (if provided) should be a sequence of dicts. Each dict
+ should have an entry for "id" indicating ID of the filter, plus
+ additional entries for options to the filter.
+ """
+ self._fp = None
+ self._closefp = False
+ self._mode = _MODE_CLOSED
+ self._pos = 0
+ self._size = -1
+
+ if mode in ("r", "rb"):
+ if check != -1:
+ raise ValueError("Cannot specify an integrity check "
+ "when opening a file for reading")
+ if preset is not None:
+ raise ValueError("Cannot specify a preset compression "
+ "level when opening a file for reading")
+ if format is None:
+ format = FORMAT_AUTO
+ mode_code = _MODE_READ
+ # Save the args to pass to the LZMADecompressor initializer.
+ # If the file contains multiple compressed streams, each
+ # stream will need a separate decompressor object.
+ self._init_args = {"format":format, "filters":filters}
+ self._decompressor = LZMADecompressor(**self._init_args)
+ self._buffer = None
+ elif mode in ("w", "wb", "a", "ab"):
+ if format is None:
+ format = FORMAT_XZ
+ mode_code = _MODE_WRITE
+ self._compressor = LZMACompressor(format=format, check=check,
+ preset=preset, filters=filters)
+ else:
+ raise ValueError("Invalid mode: {!r}".format(mode))
+
+ if isinstance(filename, (str, bytes)):
+ if "b" not in mode:
+ mode += "b"
+ self._fp = builtins.open(filename, mode)
+ self._closefp = True
+ self._mode = mode_code
+ elif hasattr(filename, "read") or hasattr(filename, "write"):
+ self._fp = filename
+ self._mode = mode_code
+ else:
+ raise TypeError("filename must be a str or bytes object, or a file")
+
+ def close(self):
+ """Flush and close the file.
+
+ May be called more than once without error. Once the file is
+ closed, any other operation on it will raise a ValueError.
+ """
+ if self._mode == _MODE_CLOSED:
+ return
+ try:
+ if self._mode in (_MODE_READ, _MODE_READ_EOF):
+ self._decompressor = None
+ self._buffer = None
+ elif self._mode == _MODE_WRITE:
+ self._fp.write(self._compressor.flush())
+ self._compressor = None
+ finally:
+ try:
+ if self._closefp:
+ self._fp.close()
+ finally:
+ self._fp = None
+ self._closefp = False
+ self._mode = _MODE_CLOSED
+
+ @property
+ def closed(self):
+ """True if this file is closed."""
+ return self._mode == _MODE_CLOSED
+
+ def fileno(self):
+ """Return the file descriptor for the underlying file."""
+ self._check_not_closed()
+ return self._fp.fileno()
+
+ def seekable(self):
+ """Return whether the file supports seeking."""
+ return self.readable() and self._fp.seekable()
+
+ def readable(self):
+ """Return whether the file was opened for reading."""
+ self._check_not_closed()
+ return self._mode in (_MODE_READ, _MODE_READ_EOF)
+
+ def writable(self):
+ """Return whether the file was opened for writing."""
+ self._check_not_closed()
+ return self._mode == _MODE_WRITE
+
+ # Mode-checking helper functions.
+
+ def _check_not_closed(self):
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ def _check_can_read(self):
+ if not self.readable():
+ raise io.UnsupportedOperation("File not open for reading")
+
+ def _check_can_write(self):
+ if not self.writable():
+ raise io.UnsupportedOperation("File not open for writing")
+
+ def _check_can_seek(self):
+ if not self.readable():
+ raise io.UnsupportedOperation("Seeking is only supported "
+ "on files open for reading")
+ if not self._fp.seekable():
+ raise io.UnsupportedOperation("The underlying file object "
+ "does not support seeking")
+
+ # Fill the readahead buffer if it is empty. Returns False on EOF.
+ def _fill_buffer(self):
+ # Depending on the input data, our call to the decompressor may not
+ # return any data. In this case, try again after reading another block.
+ while True:
+ if self._buffer:
+ return True
+
+ if self._decompressor.unused_data:
+ rawblock = self._decompressor.unused_data
+ else:
+ rawblock = self._fp.read(_BUFFER_SIZE)
+
+ if not rawblock:
+ if self._decompressor.eof:
+ self._mode = _MODE_READ_EOF
+ self._size = self._pos
+ return False
+ else:
+ raise EOFError("Compressed file ended before the "
+ "end-of-stream marker was reached")
+
+ # Continue to next stream.
+ if self._decompressor.eof:
+ self._decompressor = LZMADecompressor(**self._init_args)
+
+ self._buffer = self._decompressor.decompress(rawblock)
+
+ # Read data until EOF.
+ # If return_data is false, consume the data without returning it.
+ def _read_all(self, return_data=True):
+ blocks = []
+ while self._fill_buffer():
+ if return_data:
+ blocks.append(self._buffer)
+ self._pos += len(self._buffer)
+ self._buffer = None
+ if return_data:
+ return b"".join(blocks)
+
+ # Read a block of up to n bytes.
+ # If return_data is false, consume the data without returning it.
+ def _read_block(self, n, return_data=True):
+ blocks = []
+ while n > 0 and self._fill_buffer():
+ if n < len(self._buffer):
+ data = self._buffer[:n]
+ self._buffer = self._buffer[n:]
+ else:
+ data = self._buffer
+ self._buffer = None
+ if return_data:
+ blocks.append(data)
+ self._pos += len(data)
+ n -= len(data)
+ if return_data:
+ return b"".join(blocks)
+
+ def peek(self, size=-1):
+ """Return buffered data without advancing the file position.
+
+ Always returns at least one byte of data, unless at EOF.
+ The exact number of bytes returned is unspecified.
+ """
+ self._check_can_read()
+ if self._mode == _MODE_READ_EOF or not self._fill_buffer():
+ return b""
+ return self._buffer
+
+ def read(self, size=-1):
+ """Read up to size uncompressed bytes from the file.
+
+ If size is negative or omitted, read until EOF is reached.
+ Returns b"" if the file is already at EOF.
+ """
+ self._check_can_read()
+ if self._mode == _MODE_READ_EOF or size == 0:
+ return b""
+ elif size < 0:
+ return self._read_all()
+ else:
+ return self._read_block(size)
+
+ def read1(self, size=-1):
+ """Read up to size uncompressed bytes, while trying to avoid
+ making multiple reads from the underlying stream.
+
+ Returns b"" if the file is at EOF.
+ """
+ # Usually, read1() calls _fp.read() at most once. However, sometimes
+ # this does not give enough data for the decompressor to make progress.
+ # In this case we make multiple reads, to avoid returning b"".
+ self._check_can_read()
+ if (size == 0 or self._mode == _MODE_READ_EOF or
+ not self._fill_buffer()):
+ return b""
+ if 0 < size < len(self._buffer):
+ data = self._buffer[:size]
+ self._buffer = self._buffer[size:]
+ else:
+ data = self._buffer
+ self._buffer = None
+ self._pos += len(data)
+ return data
+
+ def write(self, data):
+ """Write a bytes object to the file.
+
+ Returns the number of uncompressed bytes written, which is
+ always len(data). Note that due to buffering, the file on disk
+ may not reflect the data written until close() is called.
+ """
+ self._check_can_write()
+ compressed = self._compressor.compress(data)
+ self._fp.write(compressed)
+ self._pos += len(data)
+ return len(data)
+
+ # Rewind the file to the beginning of the data stream.
+ def _rewind(self):
+ self._fp.seek(0, 0)
+ self._mode = _MODE_READ
+ self._pos = 0
+ self._decompressor = LZMADecompressor(**self._init_args)
+ self._buffer = None
+
+ def seek(self, offset, whence=0):
+ """Change the file position.
+
+ The new position is specified by offset, relative to the
+ position indicated by whence. Possible values for whence are:
+
+ 0: start of stream (default): offset must not be negative
+ 1: current stream position
+ 2: end of stream; offset must not be positive
+
+ Returns the new file position.
+
+ Note that seeking is emulated, sp depending on the parameters,
+ this operation may be extremely slow.
+ """
+ self._check_can_seek()
+
+ # Recalculate offset as an absolute file position.
+ if whence == 0:
+ pass
+ elif whence == 1:
+ offset = self._pos + offset
+ elif whence == 2:
+ # Seeking relative to EOF - we need to know the file's size.
+ if self._size < 0:
+ self._read_all(return_data=False)
+ offset = self._size + offset
+ else:
+ raise ValueError("Invalid value for whence: {}".format(whence))
+
+ # Make it so that offset is the number of bytes to skip forward.
+ if offset < self._pos:
+ self._rewind()
+ else:
+ offset -= self._pos
+
+ # Read and discard data until we reach the desired position.
+ if self._mode != _MODE_READ_EOF:
+ self._read_block(offset, return_data=False)
+
+ return self._pos
+
+ def tell(self):
+ """Return the current file position."""
+ self._check_not_closed()
+ return self._pos
+
+
+def open(filename, mode="rb", *,
+ format=None, check=-1, preset=None, filters=None,
+ encoding=None, errors=None, newline=None):
+ """Open an LZMA-compressed file in binary or text mode.
+
+ filename can be either an actual file name (given as a str or bytes object),
+ in which case the named file is opened, or it can be an existing file object
+ to read from or write to.
+
+ The mode argument can be "r", "rb" (default), "w", "wb", "a", or "ab" for
+ binary mode, or "rt", "wt" or "at" for text mode.
+
+ The format, check, preset and filters arguments specify the compression
+ settings, as for LZMACompressor, LZMADecompressor and LZMAFile.
+
+ For binary mode, this function is equivalent to the LZMAFile constructor:
+ LZMAFile(filename, mode, ...). In this case, the encoding, errors and
+ newline arguments must not be provided.
+
+ For text mode, a LZMAFile object is created, and wrapped in an
+ io.TextIOWrapper instance with the specified encoding, error handling
+ behavior, and line ending(s).
+
+ """
+ if "t" in mode:
+ if "b" in mode:
+ raise ValueError("Invalid mode: %r" % (mode,))
+ else:
+ if encoding is not None:
+ raise ValueError("Argument 'encoding' not supported in binary mode")
+ if errors is not None:
+ raise ValueError("Argument 'errors' not supported in binary mode")
+ if newline is not None:
+ raise ValueError("Argument 'newline' not supported in binary mode")
+
+ lz_mode = mode.replace("t", "")
+ binary_file = LZMAFile(filename, lz_mode, format=format, check=check,
+ preset=preset, filters=filters)
+
+ if "t" in mode:
+ return io.TextIOWrapper(binary_file, encoding, errors, newline)
+ else:
+ return binary_file
+
+
+def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
+ """Compress a block of data.
+
+ Refer to LZMACompressor's docstring for a description of the
+ optional arguments *format*, *check*, *preset* and *filters*.
+
+ For incremental compression, use an LZMACompressor object instead.
+ """
+ comp = LZMACompressor(format, check, preset, filters)
+ return comp.compress(data) + comp.flush()
+
+
+def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):
+ """Decompress a block of data.
+
+ Refer to LZMADecompressor's docstring for a description of the
+ optional arguments *format*, *check* and *filters*.
+
+ For incremental decompression, use a LZMADecompressor object instead.
+ """
+ results = []
+ while True:
+ decomp = LZMADecompressor(format, memlimit, filters)
+ results.append(decomp.decompress(data))
+ if not decomp.eof:
+ raise LZMAError("Compressed data ended before the "
+ "end-of-stream marker was reached")
+ if not decomp.unused_data:
+ return b"".join(results)
+ # There is unused data left over. Proceed to next stream.
+ data = decomp.unused_data
diff --git a/Lib/mailbox.py b/Lib/mailbox.py
index c73fb95..d3bf3fd 100644
--- a/Lib/mailbox.py
+++ b/Lib/mailbox.py
@@ -1157,8 +1157,7 @@ class MH(Mailbox):
def get_sequences(self):
"""Return a name-to-key-list dictionary to define each sequence."""
results = {}
- f = open(os.path.join(self._path, '.mh_sequences'), 'r')
- try:
+ with open(os.path.join(self._path, '.mh_sequences'), 'r', encoding='ASCII') as f:
all_keys = set(self.keys())
for line in f:
try:
@@ -1177,13 +1176,11 @@ class MH(Mailbox):
except ValueError:
raise FormatError('Invalid sequence specification: %s' %
line.rstrip())
- finally:
- f.close()
return results
def set_sequences(self, sequences):
"""Set sequences using the given name-to-key-list dictionary."""
- f = open(os.path.join(self._path, '.mh_sequences'), 'r+')
+ f = open(os.path.join(self._path, '.mh_sequences'), 'r+', encoding='ASCII')
try:
os.close(os.open(f.name, os.O_WRONLY | os.O_TRUNC))
for name, keys in sequences.items():
@@ -1523,9 +1520,10 @@ class Message(email.message.Message):
def _become_message(self, message):
"""Assume the non-format-specific state of message."""
- for name in ('_headers', '_unixfrom', '_payload', '_charset',
- 'preamble', 'epilogue', 'defects', '_default_type'):
- self.__dict__[name] = message.__dict__[name]
+ type_specific = getattr(message, '_type_specific_attributes', [])
+ for name in message.__dict__:
+ if name not in type_specific:
+ self.__dict__[name] = message.__dict__[name]
def _explain_to(self, message):
"""Copy format-specific state to message insofar as possible."""
@@ -1538,6 +1536,8 @@ class Message(email.message.Message):
class MaildirMessage(Message):
"""Message with Maildir-specific properties."""
+ _type_specific_attributes = ['_subdir', '_info', '_date']
+
def __init__(self, message=None):
"""Initialize a MaildirMessage instance."""
self._subdir = 'new'
@@ -1645,6 +1645,8 @@ class MaildirMessage(Message):
class _mboxMMDFMessage(Message):
"""Message with mbox- or MMDF-specific properties."""
+ _type_specific_attributes = ['_from']
+
def __init__(self, message=None):
"""Initialize an mboxMMDFMessage instance."""
self.set_from('MAILER-DAEMON', True)
@@ -1760,6 +1762,8 @@ class mboxMessage(_mboxMMDFMessage):
class MHMessage(Message):
"""Message with MH-specific properties."""
+ _type_specific_attributes = ['_sequences']
+
def __init__(self, message=None):
"""Initialize an MHMessage instance."""
self._sequences = []
@@ -1830,6 +1834,8 @@ class MHMessage(Message):
class BabylMessage(Message):
"""Message with Babyl-specific properties."""
+ _type_specific_attributes = ['_labels', '_visible']
+
def __init__(self, message=None):
"""Initialize an BabylMessage instance."""
self._labels = []
diff --git a/Lib/mailcap.py b/Lib/mailcap.py
index 4ae13d7..99f4958 100644
--- a/Lib/mailcap.py
+++ b/Lib/mailcap.py
@@ -33,10 +33,10 @@ def getcaps():
def listmailcapfiles():
"""Return a list of all mailcap files found on the system."""
- # XXX Actually, this is Unix-specific
+ # This is mostly a Unix thing, but we use the OS path separator anyway
if 'MAILCAPS' in os.environ:
- str = os.environ['MAILCAPS']
- mailcaps = str.split(':')
+ pathstr = os.environ['MAILCAPS']
+ mailcaps = pathstr.split(os.pathsep)
else:
if 'HOME' in os.environ:
home = os.environ['HOME']
diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py
index 8ce02f9..3f0bd0e 100644
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@@ -249,7 +249,6 @@ class MimeTypes:
yield ctype
i += 1
- default_encoding = sys.getdefaultencoding()
with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT,
r'MIME\Database\Content Type') as mimedb:
for ctype in enum_types(mimedb):
@@ -434,6 +433,8 @@ def _default_mime_types():
'.ksh' : 'text/plain',
'.latex' : 'application/x-latex',
'.m1v' : 'video/mpeg',
+ '.m3u' : 'application/vnd.apple.mpegurl',
+ '.m3u8' : 'application/vnd.apple.mpegurl',
'.man' : 'application/x-troff-man',
'.me' : 'application/x-troff-me',
'.mht' : 'message/rfc822',
diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py
index f033ba9..683e305 100644
--- a/Lib/modulefinder.py
+++ b/Lib/modulefinder.py
@@ -1,6 +1,5 @@
"""Find modules used by a script, using introspection."""
-from __future__ import generators
import dis
import imp
import marshal
@@ -9,8 +8,6 @@ import sys
import types
import struct
-READ_MODE = "rU"
-
# XXX Clean up once str8's cstor matches bytes.
LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')])
IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')])
@@ -29,9 +26,7 @@ packagePathMap = {}
# A Public interface
def AddPackagePath(packagename, path):
- paths = packagePathMap.get(packagename, [])
- paths.append(path)
- packagePathMap[packagename] = paths
+ packagePathMap.setdefault(packagename, []).append(path)
replacePackageMap = {}
@@ -106,14 +101,14 @@ class ModuleFinder:
def run_script(self, pathname):
self.msg(2, "run_script", pathname)
- with open(pathname, READ_MODE) as fp:
+ with open(pathname) as fp:
stuff = ("", "r", imp.PY_SOURCE)
self.load_module('__main__', fp, pathname, stuff)
def load_file(self, pathname):
dir, name = os.path.split(pathname)
name, ext = os.path.splitext(name)
- with open(pathname, READ_MODE) as fp:
+ with open(pathname) as fp:
stuff = (ext, "r", imp.PY_SOURCE)
self.load_module(name, fp, pathname, stuff)
@@ -227,8 +222,9 @@ class ModuleFinder:
# But we must also collect Python extension modules - although
# we cannot separate normal dlls from Python extensions.
suffixes = []
- for triple in imp.get_suffixes():
- suffixes.append(triple[0])
+ suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
+ suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
+ suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
for dir in m.__path__:
try:
names = os.listdir(dir)
@@ -270,7 +266,8 @@ class ModuleFinder:
try:
m = self.load_module(fqname, fp, pathname, stuff)
finally:
- if fp: fp.close()
+ if fp:
+ fp.close()
if parent:
setattr(parent, partname, m)
self.msgout(3, "import_module ->", m)
@@ -662,4 +659,4 @@ if __name__ == '__main__':
try:
mf = test()
except KeyboardInterrupt:
- print("\n[interrupt]")
+ print("\n[interrupted]")
diff --git a/Lib/multiprocessing/__init__.py b/Lib/multiprocessing/__init__.py
index e6e16c8..1f3e67c 100644
--- a/Lib/multiprocessing/__init__.py
+++ b/Lib/multiprocessing/__init__.py
@@ -13,32 +13,7 @@
#
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
__version__ = '0.70a1'
@@ -48,8 +23,8 @@ __all__ = [
'Manager', 'Pipe', 'cpu_count', 'log_to_stderr', 'get_logger',
'allow_connection_pickling', 'BufferTooShort', 'TimeoutError',
'Lock', 'RLock', 'Semaphore', 'BoundedSemaphore', 'Condition',
- 'Event', 'Queue', 'JoinableQueue', 'Pool', 'Value', 'Array',
- 'RawValue', 'RawArray', 'SUBDEBUG', 'SUBWARNING',
+ 'Event', 'Barrier', 'Queue', 'SimpleQueue', 'JoinableQueue', 'Pool',
+ 'Value', 'Array', 'RawValue', 'RawArray', 'SUBDEBUG', 'SUBWARNING',
]
__author__ = 'R. Oudkerk (r.m.oudkerk@gmail.com)'
@@ -161,7 +136,9 @@ def allow_connection_pickling():
'''
Install support for sending connections and sockets between processes
'''
- from multiprocessing import reduction
+ # This is undocumented. In previous versions of multiprocessing
+ # its only effect was to make socket objects inheritable on Windows.
+ import multiprocessing.connection
#
# Definitions depending on native semaphores
@@ -209,6 +186,13 @@ def Event():
from multiprocessing.synchronize import Event
return Event()
+def Barrier(parties, action=None, timeout=None):
+ '''
+ Returns a barrier object
+ '''
+ from multiprocessing.synchronize import Barrier
+ return Barrier(parties, action, timeout)
+
def Queue(maxsize=0):
'''
Returns a queue object
@@ -223,6 +207,13 @@ def JoinableQueue(maxsize=0):
from multiprocessing.queues import JoinableQueue
return JoinableQueue(maxsize)
+def SimpleQueue():
+ '''
+ Returns a queue object
+ '''
+ from multiprocessing.queues import SimpleQueue
+ return SimpleQueue()
+
def Pool(processes=None, initializer=None, initargs=(), maxtasksperchild=None):
'''
Returns a process pool object
@@ -244,19 +235,19 @@ def RawArray(typecode_or_type, size_or_initializer):
from multiprocessing.sharedctypes import RawArray
return RawArray(typecode_or_type, size_or_initializer)
-def Value(typecode_or_type, *args, **kwds):
+def Value(typecode_or_type, *args, lock=True):
'''
Returns a synchronized shared object
'''
from multiprocessing.sharedctypes import Value
- return Value(typecode_or_type, *args, **kwds)
+ return Value(typecode_or_type, *args, lock=lock)
-def Array(typecode_or_type, size_or_initializer, **kwds):
+def Array(typecode_or_type, size_or_initializer, *, lock=True):
'''
Returns a synchronized shared array
'''
from multiprocessing.sharedctypes import Array
- return Array(typecode_or_type, size_or_initializer, **kwds)
+ return Array(typecode_or_type, size_or_initializer, lock=lock)
#
#
diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py
index 4fa6f70..fbbd5d9 100644
--- a/Lib/multiprocessing/connection.py
+++ b/Lib/multiprocessing/connection.py
@@ -4,49 +4,34 @@
# multiprocessing/connection.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
-__all__ = [ 'Client', 'Listener', 'Pipe' ]
+__all__ = [ 'Client', 'Listener', 'Pipe', 'wait' ]
+import io
import os
import sys
+import pickle
+import select
import socket
+import struct
import errno
import time
import tempfile
import itertools
import _multiprocessing
-from multiprocessing import current_process, AuthenticationError
+from multiprocessing import current_process, AuthenticationError, BufferTooShort
from multiprocessing.util import get_temp_dir, Finalize, sub_debug, debug
-from multiprocessing.forking import duplicate, close
-
+from multiprocessing.forking import ForkingPickler
+try:
+ import _winapi
+ from _winapi import WAIT_OBJECT_0, WAIT_TIMEOUT, INFINITE
+except ImportError:
+ if sys.platform == 'win32':
+ raise
+ _winapi = None
#
#
@@ -122,6 +107,309 @@ def address_type(address):
raise ValueError('address type of %r unrecognized' % address)
#
+# Connection classes
+#
+
+class _ConnectionBase:
+ _handle = None
+
+ def __init__(self, handle, readable=True, writable=True):
+ handle = handle.__index__()
+ if handle < 0:
+ raise ValueError("invalid handle")
+ if not readable and not writable:
+ raise ValueError(
+ "at least one of `readable` and `writable` must be True")
+ self._handle = handle
+ self._readable = readable
+ self._writable = writable
+
+ # XXX should we use util.Finalize instead of a __del__?
+
+ def __del__(self):
+ if self._handle is not None:
+ self._close()
+
+ def _check_closed(self):
+ if self._handle is None:
+ raise IOError("handle is closed")
+
+ def _check_readable(self):
+ if not self._readable:
+ raise IOError("connection is write-only")
+
+ def _check_writable(self):
+ if not self._writable:
+ raise IOError("connection is read-only")
+
+ def _bad_message_length(self):
+ if self._writable:
+ self._readable = False
+ else:
+ self.close()
+ raise IOError("bad message length")
+
+ @property
+ def closed(self):
+ """True if the connection is closed"""
+ return self._handle is None
+
+ @property
+ def readable(self):
+ """True if the connection is readable"""
+ return self._readable
+
+ @property
+ def writable(self):
+ """True if the connection is writable"""
+ return self._writable
+
+ def fileno(self):
+ """File descriptor or handle of the connection"""
+ self._check_closed()
+ return self._handle
+
+ def close(self):
+ """Close the connection"""
+ if self._handle is not None:
+ try:
+ self._close()
+ finally:
+ self._handle = None
+
+ def send_bytes(self, buf, offset=0, size=None):
+ """Send the bytes data from a bytes-like object"""
+ self._check_closed()
+ self._check_writable()
+ m = memoryview(buf)
+ # HACK for byte-indexing of non-bytewise buffers (e.g. array.array)
+ if m.itemsize > 1:
+ m = memoryview(bytes(m))
+ n = len(m)
+ if offset < 0:
+ raise ValueError("offset is negative")
+ if n < offset:
+ raise ValueError("buffer length < offset")
+ if size is None:
+ size = n - offset
+ elif size < 0:
+ raise ValueError("size is negative")
+ elif offset + size > n:
+ raise ValueError("buffer length < offset + size")
+ self._send_bytes(m[offset:offset + size])
+
+ def send(self, obj):
+ """Send a (picklable) object"""
+ self._check_closed()
+ self._check_writable()
+ buf = io.BytesIO()
+ ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(obj)
+ self._send_bytes(buf.getbuffer())
+
+ def recv_bytes(self, maxlength=None):
+ """
+ Receive bytes data as a bytes object.
+ """
+ self._check_closed()
+ self._check_readable()
+ if maxlength is not None and maxlength < 0:
+ raise ValueError("negative maxlength")
+ buf = self._recv_bytes(maxlength)
+ if buf is None:
+ self._bad_message_length()
+ return buf.getvalue()
+
+ def recv_bytes_into(self, buf, offset=0):
+ """
+ Receive bytes data into a writeable buffer-like object.
+ Return the number of bytes read.
+ """
+ self._check_closed()
+ self._check_readable()
+ with memoryview(buf) as m:
+ # Get bytesize of arbitrary buffer
+ itemsize = m.itemsize
+ bytesize = itemsize * len(m)
+ if offset < 0:
+ raise ValueError("negative offset")
+ elif offset > bytesize:
+ raise ValueError("offset too large")
+ result = self._recv_bytes()
+ size = result.tell()
+ if bytesize < offset + size:
+ raise BufferTooShort(result.getvalue())
+ # Message can fit in dest
+ result.seek(0)
+ result.readinto(m[offset // itemsize :
+ (offset + size) // itemsize])
+ return size
+
+ def recv(self):
+ """Receive a (picklable) object"""
+ self._check_closed()
+ self._check_readable()
+ buf = self._recv_bytes()
+ return pickle.loads(buf.getbuffer())
+
+ def poll(self, timeout=0.0):
+ """Whether there is any input available to be read"""
+ self._check_closed()
+ self._check_readable()
+ return self._poll(timeout)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, exc_tb):
+ self.close()
+
+
+if _winapi:
+
+ class PipeConnection(_ConnectionBase):
+ """
+ Connection class based on a Windows named pipe.
+ Overlapped I/O is used, so the handles must have been created
+ with FILE_FLAG_OVERLAPPED.
+ """
+ _got_empty_message = False
+
+ def _close(self, _CloseHandle=_winapi.CloseHandle):
+ _CloseHandle(self._handle)
+
+ def _send_bytes(self, buf):
+ ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
+ try:
+ if err == _winapi.ERROR_IO_PENDING:
+ waitres = _winapi.WaitForMultipleObjects(
+ [ov.event], False, INFINITE)
+ assert waitres == WAIT_OBJECT_0
+ except:
+ ov.cancel()
+ raise
+ finally:
+ nwritten, err = ov.GetOverlappedResult(True)
+ assert err == 0
+ assert nwritten == len(buf)
+
+ def _recv_bytes(self, maxsize=None):
+ if self._got_empty_message:
+ self._got_empty_message = False
+ return io.BytesIO()
+ else:
+ bsize = 128 if maxsize is None else min(maxsize, 128)
+ try:
+ ov, err = _winapi.ReadFile(self._handle, bsize,
+ overlapped=True)
+ try:
+ if err == _winapi.ERROR_IO_PENDING:
+ waitres = _winapi.WaitForMultipleObjects(
+ [ov.event], False, INFINITE)
+ assert waitres == WAIT_OBJECT_0
+ except:
+ ov.cancel()
+ raise
+ finally:
+ nread, err = ov.GetOverlappedResult(True)
+ if err == 0:
+ f = io.BytesIO()
+ f.write(ov.getbuffer())
+ return f
+ elif err == _winapi.ERROR_MORE_DATA:
+ return self._get_more_data(ov, maxsize)
+ except IOError as e:
+ if e.winerror == _winapi.ERROR_BROKEN_PIPE:
+ raise EOFError
+ else:
+ raise
+ raise RuntimeError("shouldn't get here; expected KeyboardInterrupt")
+
+ def _poll(self, timeout):
+ if (self._got_empty_message or
+ _winapi.PeekNamedPipe(self._handle)[0] != 0):
+ return True
+ return bool(wait([self], timeout))
+
+ def _get_more_data(self, ov, maxsize):
+ buf = ov.getbuffer()
+ f = io.BytesIO()
+ f.write(buf)
+ left = _winapi.PeekNamedPipe(self._handle)[1]
+ assert left > 0
+ if maxsize is not None and len(buf) + left > maxsize:
+ self._bad_message_length()
+ ov, err = _winapi.ReadFile(self._handle, left, overlapped=True)
+ rbytes, err = ov.GetOverlappedResult(True)
+ assert err == 0
+ assert rbytes == left
+ f.write(ov.getbuffer())
+ return f
+
+
+class Connection(_ConnectionBase):
+ """
+ Connection class based on an arbitrary file descriptor (Unix only), or
+ a socket handle (Windows).
+ """
+
+ if _winapi:
+ def _close(self, _close=_multiprocessing.closesocket):
+ _close(self._handle)
+ _write = _multiprocessing.send
+ _read = _multiprocessing.recv
+ else:
+ def _close(self, _close=os.close):
+ _close(self._handle)
+ _write = os.write
+ _read = os.read
+
+ def _send(self, buf, write=_write):
+ remaining = len(buf)
+ while True:
+ n = write(self._handle, buf)
+ remaining -= n
+ if remaining == 0:
+ break
+ buf = buf[n:]
+
+ def _recv(self, size, read=_read):
+ buf = io.BytesIO()
+ handle = self._handle
+ remaining = size
+ while remaining > 0:
+ chunk = read(handle, remaining)
+ n = len(chunk)
+ if n == 0:
+ if remaining == size:
+ raise EOFError
+ else:
+ raise IOError("got end of file during message")
+ buf.write(chunk)
+ remaining -= n
+ return buf
+
+ def _send_bytes(self, buf):
+ # For wire compatibility with 3.2 and lower
+ n = len(buf)
+ self._send(struct.pack("!i", n))
+ # The condition is necessary to avoid "broken pipe" errors
+ # when sending a 0-length buffer if the other end closed the pipe.
+ if n > 0:
+ self._send(buf)
+
+ def _recv_bytes(self, maxsize=None):
+ buf = self._recv(4)
+ size, = struct.unpack("!i", buf.getvalue())
+ if maxsize is not None and size > maxsize:
+ return None
+ return self._recv(size)
+
+ def _poll(self, timeout):
+ r = wait([self._handle], timeout)
+ return bool(r)
+
+
+#
# Public functions
#
@@ -154,6 +442,8 @@ class Listener(object):
Returns a `Connection` object.
'''
+ if self._listener is None:
+ raise IOError('listener is closed')
c = self._listener.accept()
if self._authkey:
deliver_challenge(c, self._authkey)
@@ -164,11 +454,19 @@ class Listener(object):
'''
Close the bound socket or named pipe of `self`.
'''
- return self._listener.close()
+ if self._listener is not None:
+ self._listener.close()
+ self._listener = None
address = property(lambda self: self._listener._address)
last_accepted = property(lambda self: self._listener._last_accepted)
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, exc_tb):
+ self.close()
+
def Client(address, family=None, authkey=None):
'''
@@ -201,56 +499,52 @@ if sys.platform != 'win32':
s1, s2 = socket.socketpair()
s1.setblocking(True)
s2.setblocking(True)
- c1 = _multiprocessing.Connection(os.dup(s1.fileno()))
- c2 = _multiprocessing.Connection(os.dup(s2.fileno()))
- s1.close()
- s2.close()
+ c1 = Connection(s1.detach())
+ c2 = Connection(s2.detach())
else:
fd1, fd2 = os.pipe()
- c1 = _multiprocessing.Connection(fd1, writable=False)
- c2 = _multiprocessing.Connection(fd2, readable=False)
+ c1 = Connection(fd1, writable=False)
+ c2 = Connection(fd2, readable=False)
return c1, c2
else:
- from _multiprocessing import win32
-
def Pipe(duplex=True):
'''
Returns pair of connection objects at either end of a pipe
'''
address = arbitrary_address('AF_PIPE')
if duplex:
- openmode = win32.PIPE_ACCESS_DUPLEX
- access = win32.GENERIC_READ | win32.GENERIC_WRITE
+ openmode = _winapi.PIPE_ACCESS_DUPLEX
+ access = _winapi.GENERIC_READ | _winapi.GENERIC_WRITE
obsize, ibsize = BUFSIZE, BUFSIZE
else:
- openmode = win32.PIPE_ACCESS_INBOUND
- access = win32.GENERIC_WRITE
+ openmode = _winapi.PIPE_ACCESS_INBOUND
+ access = _winapi.GENERIC_WRITE
obsize, ibsize = 0, BUFSIZE
- h1 = win32.CreateNamedPipe(
- address, openmode,
- win32.PIPE_TYPE_MESSAGE | win32.PIPE_READMODE_MESSAGE |
- win32.PIPE_WAIT,
- 1, obsize, ibsize, win32.NMPWAIT_WAIT_FOREVER, win32.NULL
+ h1 = _winapi.CreateNamedPipe(
+ address, openmode | _winapi.FILE_FLAG_OVERLAPPED |
+ _winapi.FILE_FLAG_FIRST_PIPE_INSTANCE,
+ _winapi.PIPE_TYPE_MESSAGE | _winapi.PIPE_READMODE_MESSAGE |
+ _winapi.PIPE_WAIT,
+ 1, obsize, ibsize, _winapi.NMPWAIT_WAIT_FOREVER, _winapi.NULL
)
- h2 = win32.CreateFile(
- address, access, 0, win32.NULL, win32.OPEN_EXISTING, 0, win32.NULL
+ h2 = _winapi.CreateFile(
+ address, access, 0, _winapi.NULL, _winapi.OPEN_EXISTING,
+ _winapi.FILE_FLAG_OVERLAPPED, _winapi.NULL
)
- win32.SetNamedPipeHandleState(
- h2, win32.PIPE_READMODE_MESSAGE, None, None
+ _winapi.SetNamedPipeHandleState(
+ h2, _winapi.PIPE_READMODE_MESSAGE, None, None
)
- try:
- win32.ConnectNamedPipe(h1, win32.NULL)
- except WindowsError as e:
- if e.args[0] != win32.ERROR_PIPE_CONNECTED:
- raise
+ overlapped = _winapi.ConnectNamedPipe(h1, overlapped=True)
+ _, err = overlapped.GetOverlappedResult(True)
+ assert err == 0
- c1 = _multiprocessing.PipeConnection(h1, writable=duplex)
- c2 = _multiprocessing.PipeConnection(h2, readable=duplex)
+ c1 = PipeConnection(h1, writable=duplex)
+ c2 = PipeConnection(h2, readable=duplex)
return c1, c2
@@ -265,12 +559,15 @@ class SocketListener(object):
def __init__(self, address, family, backlog=1):
self._socket = socket.socket(getattr(socket, family))
try:
- self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ # SO_REUSEADDR has different semantics on Windows (issue #2550).
+ if os.name == 'posix':
+ self._socket.setsockopt(socket.SOL_SOCKET,
+ socket.SO_REUSEADDR, 1)
self._socket.setblocking(True)
self._socket.bind(address)
self._socket.listen(backlog)
self._address = self._socket.getsockname()
- except socket.error:
+ except OSError:
self._socket.close()
raise
self._family = family
@@ -286,10 +583,7 @@ class SocketListener(object):
def accept(self):
s, self._last_accepted = self._socket.accept()
s.setblocking(True)
- fd = duplicate(s.fileno())
- conn = _multiprocessing.Connection(fd)
- s.close()
- return conn
+ return Connection(s.detach())
def close(self):
self._socket.close()
@@ -304,24 +598,8 @@ def SocketClient(address):
family = address_type(address)
with socket.socket( getattr(socket, family) ) as s:
s.setblocking(True)
- t = _init_timeout()
-
- while 1:
- try:
- s.connect(address)
- except socket.error as e:
- if e.args[0] != errno.ECONNREFUSED or _check_timeout(t):
- debug('failed to connect to address %s', address)
- raise
- time.sleep(0.01)
- else:
- break
- else:
- raise
-
- fd = duplicate(s.fileno())
- conn = _multiprocessing.Connection(fd)
- return conn
+ s.connect(address)
+ return Connection(s.detach())
#
# Definitions for connections based on named pipes
@@ -335,48 +613,55 @@ if sys.platform == 'win32':
'''
def __init__(self, address, backlog=None):
self._address = address
- handle = win32.CreateNamedPipe(
- address, win32.PIPE_ACCESS_DUPLEX,
- win32.PIPE_TYPE_MESSAGE | win32.PIPE_READMODE_MESSAGE |
- win32.PIPE_WAIT,
- win32.PIPE_UNLIMITED_INSTANCES, BUFSIZE, BUFSIZE,
- win32.NMPWAIT_WAIT_FOREVER, win32.NULL
- )
- self._handle_queue = [handle]
- self._last_accepted = None
+ self._handle_queue = [self._new_handle(first=True)]
+ self._last_accepted = None
sub_debug('listener created with address=%r', self._address)
-
self.close = Finalize(
self, PipeListener._finalize_pipe_listener,
args=(self._handle_queue, self._address), exitpriority=0
)
- def accept(self):
- newhandle = win32.CreateNamedPipe(
- self._address, win32.PIPE_ACCESS_DUPLEX,
- win32.PIPE_TYPE_MESSAGE | win32.PIPE_READMODE_MESSAGE |
- win32.PIPE_WAIT,
- win32.PIPE_UNLIMITED_INSTANCES, BUFSIZE, BUFSIZE,
- win32.NMPWAIT_WAIT_FOREVER, win32.NULL
+ def _new_handle(self, first=False):
+ flags = _winapi.PIPE_ACCESS_DUPLEX | _winapi.FILE_FLAG_OVERLAPPED
+ if first:
+ flags |= _winapi.FILE_FLAG_FIRST_PIPE_INSTANCE
+ return _winapi.CreateNamedPipe(
+ self._address, flags,
+ _winapi.PIPE_TYPE_MESSAGE | _winapi.PIPE_READMODE_MESSAGE |
+ _winapi.PIPE_WAIT,
+ _winapi.PIPE_UNLIMITED_INSTANCES, BUFSIZE, BUFSIZE,
+ _winapi.NMPWAIT_WAIT_FOREVER, _winapi.NULL
)
- self._handle_queue.append(newhandle)
+
+ def accept(self):
+ self._handle_queue.append(self._new_handle())
handle = self._handle_queue.pop(0)
try:
- win32.ConnectNamedPipe(handle, win32.NULL)
- except WindowsError as e:
+ ov = _winapi.ConnectNamedPipe(handle, overlapped=True)
+ except OSError as e:
+ if e.winerror != _winapi.ERROR_NO_DATA:
+ raise
# ERROR_NO_DATA can occur if a client has already connected,
# written data and then disconnected -- see Issue 14725.
- if e.args[0] not in (win32.ERROR_PIPE_CONNECTED,
- win32.ERROR_NO_DATA):
+ else:
+ try:
+ res = _winapi.WaitForMultipleObjects(
+ [ov.event], False, INFINITE)
+ except:
+ ov.cancel()
+ _winapi.CloseHandle(handle)
raise
- return _multiprocessing.PipeConnection(handle)
+ finally:
+ _, err = ov.GetOverlappedResult(True)
+ assert err == 0
+ return PipeConnection(handle)
@staticmethod
def _finalize_pipe_listener(queue, address):
sub_debug('closing listener with address=%r', address)
for handle in queue:
- close(handle)
+ _winapi.CloseHandle(handle)
def PipeClient(address):
'''
@@ -385,24 +670,25 @@ if sys.platform == 'win32':
t = _init_timeout()
while 1:
try:
- win32.WaitNamedPipe(address, 1000)
- h = win32.CreateFile(
- address, win32.GENERIC_READ | win32.GENERIC_WRITE,
- 0, win32.NULL, win32.OPEN_EXISTING, 0, win32.NULL
+ _winapi.WaitNamedPipe(address, 1000)
+ h = _winapi.CreateFile(
+ address, _winapi.GENERIC_READ | _winapi.GENERIC_WRITE,
+ 0, _winapi.NULL, _winapi.OPEN_EXISTING,
+ _winapi.FILE_FLAG_OVERLAPPED, _winapi.NULL
)
except WindowsError as e:
- if e.args[0] not in (win32.ERROR_SEM_TIMEOUT,
- win32.ERROR_PIPE_BUSY) or _check_timeout(t):
+ if e.winerror not in (_winapi.ERROR_SEM_TIMEOUT,
+ _winapi.ERROR_PIPE_BUSY) or _check_timeout(t):
raise
else:
break
else:
raise
- win32.SetNamedPipeHandleState(
- h, win32.PIPE_READMODE_MESSAGE, None, None
+ _winapi.SetNamedPipeHandleState(
+ h, _winapi.PIPE_READMODE_MESSAGE, None, None
)
- return _multiprocessing.PipeConnection(h)
+ return PipeConnection(h)
#
# Authentication stuff
@@ -459,10 +745,10 @@ class ConnectionWrapper(object):
return self._loads(s)
def _xml_dumps(obj):
- return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf8')
+ return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf-8')
def _xml_loads(s):
- (obj,), method = xmlrpclib.loads(s.decode('utf8'))
+ (obj,), method = xmlrpclib.loads(s.decode('utf-8'))
return obj
class XmlListener(Listener):
@@ -476,3 +762,140 @@ def XmlClient(*args, **kwds):
global xmlrpclib
import xmlrpc.client as xmlrpclib
return ConnectionWrapper(Client(*args, **kwds), _xml_dumps, _xml_loads)
+
+#
+# Wait
+#
+
+if sys.platform == 'win32':
+
+ def _exhaustive_wait(handles, timeout):
+ # Return ALL handles which are currently signalled. (Only
+ # returning the first signalled might create starvation issues.)
+ L = list(handles)
+ ready = []
+ while L:
+ res = _winapi.WaitForMultipleObjects(L, False, timeout)
+ if res == WAIT_TIMEOUT:
+ break
+ elif WAIT_OBJECT_0 <= res < WAIT_OBJECT_0 + len(L):
+ res -= WAIT_OBJECT_0
+ elif WAIT_ABANDONED_0 <= res < WAIT_ABANDONED_0 + len(L):
+ res -= WAIT_ABANDONED_0
+ else:
+ raise RuntimeError('Should not get here')
+ ready.append(L[res])
+ L = L[res+1:]
+ timeout = 0
+ return ready
+
+ _ready_errors = {_winapi.ERROR_BROKEN_PIPE, _winapi.ERROR_NETNAME_DELETED}
+
+ def wait(object_list, timeout=None):
+ '''
+ Wait till an object in object_list is ready/readable.
+
+ Returns list of those objects in object_list which are ready/readable.
+ '''
+ if timeout is None:
+ timeout = INFINITE
+ elif timeout < 0:
+ timeout = 0
+ else:
+ timeout = int(timeout * 1000 + 0.5)
+
+ object_list = list(object_list)
+ waithandle_to_obj = {}
+ ov_list = []
+ ready_objects = set()
+ ready_handles = set()
+
+ try:
+ for o in object_list:
+ try:
+ fileno = getattr(o, 'fileno')
+ except AttributeError:
+ waithandle_to_obj[o.__index__()] = o
+ else:
+ # start an overlapped read of length zero
+ try:
+ ov, err = _winapi.ReadFile(fileno(), 0, True)
+ except OSError as e:
+ err = e.winerror
+ if err not in _ready_errors:
+ raise
+ if err == _winapi.ERROR_IO_PENDING:
+ ov_list.append(ov)
+ waithandle_to_obj[ov.event] = o
+ else:
+ # If o.fileno() is an overlapped pipe handle and
+ # err == 0 then there is a zero length message
+ # in the pipe, but it HAS NOT been consumed.
+ ready_objects.add(o)
+ timeout = 0
+
+ ready_handles = _exhaustive_wait(waithandle_to_obj.keys(), timeout)
+ finally:
+ # request that overlapped reads stop
+ for ov in ov_list:
+ ov.cancel()
+
+ # wait for all overlapped reads to stop
+ for ov in ov_list:
+ try:
+ _, err = ov.GetOverlappedResult(True)
+ except OSError as e:
+ err = e.winerror
+ if err not in _ready_errors:
+ raise
+ if err != _winapi.ERROR_OPERATION_ABORTED:
+ o = waithandle_to_obj[ov.event]
+ ready_objects.add(o)
+ if err == 0:
+ # If o.fileno() is an overlapped pipe handle then
+ # a zero length message HAS been consumed.
+ if hasattr(o, '_got_empty_message'):
+ o._got_empty_message = True
+
+ ready_objects.update(waithandle_to_obj[h] for h in ready_handles)
+ return [o for o in object_list if o in ready_objects]
+
+else:
+
+ def wait(object_list, timeout=None):
+ '''
+ Wait till an object in object_list is ready/readable.
+
+ Returns list of those objects in object_list which are ready/readable.
+ '''
+ if timeout is not None:
+ if timeout <= 0:
+ return select.select(object_list, [], [], 0)[0]
+ else:
+ deadline = time.time() + timeout
+ while True:
+ try:
+ return select.select(object_list, [], [], timeout)[0]
+ except OSError as e:
+ if e.errno != errno.EINTR:
+ raise
+ if timeout is not None:
+ timeout = deadline - time.time()
+
+#
+# Make connection and socket objects sharable if possible
+#
+
+if sys.platform == 'win32':
+ from . import reduction
+ ForkingPickler.register(socket.socket, reduction.reduce_socket)
+ ForkingPickler.register(Connection, reduction.reduce_connection)
+ ForkingPickler.register(PipeConnection, reduction.reduce_pipe_connection)
+else:
+ try:
+ from . import reduction
+ except ImportError:
+ pass
+ else:
+ ForkingPickler.register(socket.socket, reduction.reduce_socket)
+ ForkingPickler.register(Connection, reduction.reduce_connection)
diff --git a/Lib/multiprocessing/dummy/__init__.py b/Lib/multiprocessing/dummy/__init__.py
index 101c3cb..e31fc61 100644
--- a/Lib/multiprocessing/dummy/__init__.py
+++ b/Lib/multiprocessing/dummy/__init__.py
@@ -35,7 +35,7 @@
__all__ = [
'Process', 'current_process', 'active_children', 'freeze_support',
'Lock', 'RLock', 'Semaphore', 'BoundedSemaphore', 'Condition',
- 'Event', 'Queue', 'Manager', 'Pipe', 'Pool', 'JoinableQueue'
+ 'Event', 'Barrier', 'Queue', 'Manager', 'Pipe', 'Pool', 'JoinableQueue'
]
#
@@ -46,12 +46,10 @@ import threading
import sys
import weakref
import array
-import itertools
-from multiprocessing import TimeoutError, cpu_count
from multiprocessing.dummy.connection import Pipe
from threading import Lock, RLock, Semaphore, BoundedSemaphore
-from threading import Event
+from threading import Event, Condition, Barrier
from queue import Queue
#
@@ -85,17 +83,6 @@ class DummyProcess(threading.Thread):
#
#
-class Condition(threading._Condition):
- # XXX
- if sys.version_info < (3, 0):
- notify_all = threading._Condition.notify_all.__func__
- else:
- notify_all = threading._Condition.notify_all
-
-#
-#
-#
-
Process = DummyProcess
current_process = threading.current_thread
current_process()._children = weakref.WeakKeyDictionary()
diff --git a/Lib/multiprocessing/dummy/connection.py b/Lib/multiprocessing/dummy/connection.py
index af10579..874ec8e 100644
--- a/Lib/multiprocessing/dummy/connection.py
+++ b/Lib/multiprocessing/dummy/connection.py
@@ -53,6 +53,12 @@ class Listener(object):
address = property(lambda self: self._backlog_queue)
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, exc_tb):
+ self.close()
+
def Client(address):
_in, _out = Queue(), Queue()
@@ -85,3 +91,9 @@ class Connection(object):
def close(self):
pass
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, exc_tb):
+ self.close()
diff --git a/Lib/multiprocessing/forking.py b/Lib/multiprocessing/forking.py
index bc8ac44..af6580d 100644
--- a/Lib/multiprocessing/forking.py
+++ b/Lib/multiprocessing/forking.py
@@ -4,32 +4,7 @@
# multiprocessing/forking.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
import os
@@ -38,7 +13,7 @@ import signal
from multiprocessing import util, process
-__all__ = ['Popen', 'assert_spawning', 'exit', 'duplicate', 'close', 'ForkingPickler']
+__all__ = ['Popen', 'assert_spawning', 'duplicate', 'close', 'ForkingPickler']
#
# Check that the current thread is spawning a child process
@@ -55,18 +30,18 @@ def assert_spawning(self):
# Try making some callable types picklable
#
-from pickle import _Pickler as Pickler
+from pickle import Pickler
+from copyreg import dispatch_table
+
class ForkingPickler(Pickler):
- dispatch = Pickler.dispatch.copy()
+ _extra_reducers = {}
+ def __init__(self, *args):
+ Pickler.__init__(self, *args)
+ self.dispatch_table = dispatch_table.copy()
+ self.dispatch_table.update(self._extra_reducers)
@classmethod
def register(cls, type, reduce):
- def dispatcher(self, obj):
- rv = reduce(obj)
- if isinstance(rv, str):
- self.save_global(obj, rv)
- else:
- self.save_reduce(obj=obj, *rv)
- cls.dispatch[type] = dispatcher
+ cls._extra_reducers[type] = reduce
def _reduce_method(m):
if m.__self__ is None:
@@ -100,9 +75,6 @@ else:
#
if sys.platform != 'win32':
- import time
-
- exit = os._exit
duplicate = os.dup
close = os.close
@@ -118,14 +90,23 @@ if sys.platform != 'win32':
sys.stderr.flush()
self.returncode = None
+ r, w = os.pipe()
+ self.sentinel = r
+
self.pid = os.fork()
if self.pid == 0:
+ os.close(r)
if 'random' in sys.modules:
import random
random.seed()
code = process_obj._bootstrap()
os._exit(code)
+ # `w` will be closed when the child exits, at which point `r`
+ # will become ready for reading (using e.g. select()).
+ os.close(w)
+ util.Finalize(self, os.close, (r,))
+
def poll(self, flag=os.WNOHANG):
if self.returncode is None:
try:
@@ -143,26 +124,20 @@ if sys.platform != 'win32':
return self.returncode
def wait(self, timeout=None):
- if timeout is None:
- return self.poll(0)
- deadline = time.time() + timeout
- delay = 0.0005
- while 1:
- res = self.poll()
- if res is not None:
- break
- remaining = deadline - time.time()
- if remaining <= 0:
- break
- delay = min(delay * 2, remaining, 0.05)
- time.sleep(delay)
- return res
+ if self.returncode is None:
+ if timeout is not None:
+ from .connection import wait
+ if not wait([self.sentinel], timeout):
+ return None
+ # This shouldn't block if wait() returned successfully.
+ return self.poll(os.WNOHANG if timeout == 0.0 else 0)
+ return self.returncode
def terminate(self):
if self.returncode is None:
try:
os.kill(self.pid, signal.SIGTERM)
- except OSError as e:
+ except OSError:
if self.wait(timeout=0.1) is None:
raise
@@ -177,12 +152,9 @@ if sys.platform != 'win32':
else:
import _thread
import msvcrt
- import _subprocess
- import time
+ import _winapi
- from pickle import dump, load, HIGHEST_PROTOCOL
- from _multiprocessing import win32, Connection, PipeConnection
- from .util import Finalize
+ from pickle import load, HIGHEST_PROTOCOL
def dump(obj, file, protocol=None):
ForkingPickler(file, protocol).dump(obj)
@@ -195,8 +167,7 @@ else:
WINEXE = (sys.platform == 'win32' and getattr(sys, 'frozen', False))
WINSERVICE = sys.executable.lower().endswith("pythonservice.exe")
- exit = win32.ExitProcess
- close = win32.CloseHandle
+ close = _winapi.CloseHandle
#
# _python_exe is the assumed path to the python executable.
@@ -218,11 +189,11 @@ else:
def duplicate(handle, target_process=None, inheritable=False):
if target_process is None:
- target_process = _subprocess.GetCurrentProcess()
- return _subprocess.DuplicateHandle(
- _subprocess.GetCurrentProcess(), handle, target_process,
- 0, inheritable, _subprocess.DUPLICATE_SAME_ACCESS
- ).Detach()
+ target_process = _winapi.GetCurrentProcess()
+ return _winapi.DuplicateHandle(
+ _winapi.GetCurrentProcess(), handle, target_process,
+ 0, inheritable, _winapi.DUPLICATE_SAME_ACCESS
+ )
#
# We define a Popen class similar to the one from subprocess, but
@@ -236,6 +207,9 @@ else:
_tls = _thread._local()
def __init__(self, process_obj):
+ cmd = ' '.join('"%s"' % x for x in get_command_line())
+ prep_data = get_preparation_data(process_obj._name)
+
# create pipe for communication with child
rfd, wfd = os.pipe()
@@ -243,30 +217,30 @@ else:
rhandle = duplicate(msvcrt.get_osfhandle(rfd), inheritable=True)
os.close(rfd)
- # start process
- cmd = get_command_line() + [rhandle]
- cmd = ' '.join('"%s"' % x for x in cmd)
- hp, ht, pid, tid = _subprocess.CreateProcess(
- _python_exe, cmd, None, None, 1, 0, None, None, None
- )
- ht.Close()
- close(rhandle)
-
- # set attributes of self
- self.pid = pid
- self.returncode = None
- self._handle = hp
-
- # send information to child
- prep_data = get_preparation_data(process_obj._name)
- to_child = os.fdopen(wfd, 'wb')
- Popen._tls.process_handle = int(hp)
- try:
- dump(prep_data, to_child, HIGHEST_PROTOCOL)
- dump(process_obj, to_child, HIGHEST_PROTOCOL)
- finally:
- del Popen._tls.process_handle
- to_child.close()
+ with open(wfd, 'wb', closefd=True) as to_child:
+ # start process
+ try:
+ hp, ht, pid, tid = _winapi.CreateProcess(
+ _python_exe, cmd + (' %s' % rhandle),
+ None, None, 1, 0, None, None, None
+ )
+ _winapi.CloseHandle(ht)
+ finally:
+ close(rhandle)
+
+ # set attributes of self
+ self.pid = pid
+ self.returncode = None
+ self._handle = hp
+ self.sentinel = int(hp)
+
+ # send information to child
+ Popen._tls.process_handle = int(hp)
+ try:
+ dump(prep_data, to_child, HIGHEST_PROTOCOL)
+ dump(process_obj, to_child, HIGHEST_PROTOCOL)
+ finally:
+ del Popen._tls.process_handle
@staticmethod
def thread_is_spawning():
@@ -279,13 +253,13 @@ else:
def wait(self, timeout=None):
if self.returncode is None:
if timeout is None:
- msecs = _subprocess.INFINITE
+ msecs = _winapi.INFINITE
else:
msecs = max(0, int(timeout * 1000 + 0.5))
- res = _subprocess.WaitForSingleObject(int(self._handle), msecs)
- if res == _subprocess.WAIT_OBJECT_0:
- code = _subprocess.GetExitCodeProcess(self._handle)
+ res = _winapi.WaitForSingleObject(int(self._handle), msecs)
+ if res == _winapi.WAIT_OBJECT_0:
+ code = _winapi.GetExitCodeProcess(self._handle)
if code == TERMINATE:
code = -signal.SIGTERM
self.returncode = code
@@ -298,9 +272,9 @@ else:
def terminate(self):
if self.returncode is None:
try:
- _subprocess.TerminateProcess(int(self._handle), TERMINATE)
- except WindowsError:
- if self.wait(timeout=0.1) is None:
+ _winapi.TerminateProcess(int(self._handle), TERMINATE)
+ except OSError:
+ if self.wait(timeout=1.0) is None:
raise
#
@@ -350,7 +324,8 @@ else:
return [sys.executable, '--multiprocessing-fork']
else:
prog = 'from multiprocessing.forking import main; main()'
- return [_python_exe, '-c', prog, '--multiprocessing-fork']
+ opts = util._args_from_interpreter_flags()
+ return [_python_exe] + opts + ['-c', prog, '--multiprocessing-fork']
def main():
@@ -372,7 +347,7 @@ else:
from_parent.close()
exitcode = self._bootstrap()
- exit(exitcode)
+ sys.exit(exitcode)
def get_preparation_data(name):
@@ -405,22 +380,6 @@ else:
return d
- #
- # Make (Pipe)Connection picklable
- #
-
- def reduce_connection(conn):
- if not Popen.thread_is_spawning():
- raise RuntimeError(
- 'By default %s objects can only be shared between processes\n'
- 'using inheritance' % type(conn).__name__
- )
- return type(conn), (Popen.duplicate_for_child(conn.fileno()),
- conn.readable, conn.writable)
-
- ForkingPickler.register(Connection, reduce_connection)
- ForkingPickler.register(PipeConnection, reduce_connection)
-
#
# Prepare current process
#
diff --git a/Lib/multiprocessing/heap.py b/Lib/multiprocessing/heap.py
index 0a25ef0..4e93c12 100644
--- a/Lib/multiprocessing/heap.py
+++ b/Lib/multiprocessing/heap.py
@@ -4,37 +4,11 @@
# multiprocessing/heap.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
import bisect
import mmap
-import tempfile
import os
import sys
import threading
@@ -52,7 +26,7 @@ __all__ = ['BufferWrapper']
if sys.platform == 'win32':
- from _multiprocessing import win32
+ import _winapi
class Arena(object):
@@ -62,7 +36,7 @@ if sys.platform == 'win32':
self.size = size
self.name = 'pym-%d-%d' % (os.getpid(), next(Arena._counter))
self.buffer = mmap.mmap(-1, self.size, tagname=self.name)
- assert win32.GetLastError() == 0, 'tagname already in use'
+ assert _winapi.GetLastError() == 0, 'tagname already in use'
self._state = (self.size, self.name)
def __getstate__(self):
@@ -72,7 +46,7 @@ if sys.platform == 'win32':
def __setstate__(self, state):
self.size, self.name = self._state = state
self.buffer = mmap.mmap(-1, self.size, tagname=self.name)
- assert win32.GetLastError() == win32.ERROR_ALREADY_EXISTS
+ assert _winapi.GetLastError() == _winapi.ERROR_ALREADY_EXISTS
else:
@@ -231,7 +205,7 @@ class Heap(object):
self._lock.release()
#
-# Class representing a chunk of an mmap -- can be inherited
+# Class representing a chunk of an mmap -- can be inherited by child process
#
class BufferWrapper(object):
@@ -244,11 +218,6 @@ class BufferWrapper(object):
self._state = (block, size)
Finalize(self, BufferWrapper._heap.free, args=(block,))
- def get_address(self):
+ def create_memoryview(self):
(arena, start, stop), size = self._state
- address, length = _multiprocessing.address_of_buffer(arena.buffer)
- assert size <= length
- return address + start
-
- def get_size(self):
- return self._state[1]
+ return memoryview(arena.buffer)[start:start+size]
diff --git a/Lib/multiprocessing/managers.py b/Lib/multiprocessing/managers.py
index 5588ead..1ab147e 100644
--- a/Lib/multiprocessing/managers.py
+++ b/Lib/multiprocessing/managers.py
@@ -5,32 +5,7 @@
# multiprocessing/managers.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
__all__ = [ 'BaseManager', 'SyncManager', 'BaseProxy', 'Token' ]
@@ -39,19 +14,16 @@ __all__ = [ 'BaseManager', 'SyncManager', 'BaseProxy', 'Token' ]
# Imports
#
-import os
import sys
-import weakref
import threading
import array
import queue
from traceback import format_exc
-from pickle import PicklingError
from multiprocessing import Process, current_process, active_children, Pool, util, connection
from multiprocessing.process import AuthenticationString
-from multiprocessing.forking import exit, Popen, assert_spawning, ForkingPickler
-from multiprocessing.util import Finalize, info
+from multiprocessing.forking import Popen, ForkingPickler
+from time import time as _time
#
# Register some things for pickling
@@ -168,28 +140,38 @@ class Server(object):
self.id_to_obj = {'0': (None, ())}
self.id_to_refcount = {}
self.mutex = threading.RLock()
- self.stop = 0
def serve_forever(self):
'''
Run the server forever
'''
+ self.stop_event = threading.Event()
current_process()._manager_server = self
try:
+ accepter = threading.Thread(target=self.accepter)
+ accepter.daemon = True
+ accepter.start()
try:
- while 1:
- try:
- c = self.listener.accept()
- except (OSError, IOError):
- continue
- t = threading.Thread(target=self.handle_request, args=(c,))
- t.daemon = True
- t.start()
+ while not self.stop_event.is_set():
+ self.stop_event.wait(1)
except (KeyboardInterrupt, SystemExit):
pass
finally:
- self.stop = 999
- self.listener.close()
+ if sys.stdout != sys.__stdout__:
+ util.debug('resetting stdout, stderr')
+ sys.stdout = sys.__stdout__
+ sys.stderr = sys.__stderr__
+ sys.exit(0)
+
+ def accepter(self):
+ while True:
+ try:
+ c = self.listener.accept()
+ except (OSError, IOError):
+ continue
+ t = threading.Thread(target=self.handle_request, args=(c,))
+ t.daemon = True
+ t.start()
def handle_request(self, c):
'''
@@ -236,7 +218,7 @@ class Server(object):
send = conn.send
id_to_obj = self.id_to_obj
- while not self.stop:
+ while not self.stop_event.is_set():
try:
methodname = obj = None
@@ -346,32 +328,13 @@ class Server(object):
Shutdown this process
'''
try:
- try:
- util.debug('manager received shutdown message')
- c.send(('#RETURN', None))
-
- if sys.stdout != sys.__stdout__:
- util.debug('resetting stdout, stderr')
- sys.stdout = sys.__stdout__
- sys.stderr = sys.__stderr__
-
- util._run_finalizers(0)
-
- for p in active_children():
- util.debug('terminating a child process of manager')
- p.terminate()
-
- for p in active_children():
- util.debug('terminating a child process of manager')
- p.join()
-
- util._run_finalizers()
- util.info('manager exiting with exitcode 0')
- except:
- import traceback
- traceback.print_exc()
+ util.debug('manager received shutdown message')
+ c.send(('#RETURN', None))
+ except:
+ import traceback
+ traceback.print_exc()
finally:
- exit(0)
+ self.stop_event.set()
def create(self, c, typeid, *args, **kwds):
'''
@@ -483,10 +446,6 @@ class BaseManager(object):
self._serializer = serializer
self._Listener, self._Client = listener_client[serializer]
- def __reduce__(self):
- return type(self).from_address, \
- (self._address, self._authkey, self._serializer)
-
def get_server(self):
'''
Return server object with serve_forever() method and address attribute
@@ -576,7 +535,10 @@ class BaseManager(object):
'''
Join the manager process (if it has been spawned)
'''
- self._process.join(timeout)
+ if self._process is not None:
+ self._process.join(timeout)
+ if not self._process.is_alive():
+ self._process = None
def _debug_info(self):
'''
@@ -599,6 +561,9 @@ class BaseManager(object):
conn.close()
def __enter__(self):
+ if self._state.value == State.INITIAL:
+ self.start()
+ assert self._state.value == State.STARTED
return self
def __exit__(self, exc_type, exc_val, exc_tb):
@@ -620,7 +585,7 @@ class BaseManager(object):
except Exception:
pass
- process.join(timeout=0.2)
+ process.join(timeout=1.0)
if process.is_alive():
util.info('manager still alive')
if hasattr(process, 'terminate'):
@@ -982,8 +947,9 @@ class IteratorProxy(BaseProxy):
class AcquirerProxy(BaseProxy):
_exposed_ = ('acquire', 'release')
- def acquire(self, blocking=True):
- return self._callmethod('acquire', (blocking,))
+ def acquire(self, blocking=True, timeout=None):
+ args = (blocking,) if timeout is None else (blocking, timeout)
+ return self._callmethod('acquire', args)
def release(self):
return self._callmethod('release')
def __enter__(self):
@@ -1000,6 +966,24 @@ class ConditionProxy(AcquirerProxy):
return self._callmethod('notify')
def notify_all(self):
return self._callmethod('notify_all')
+ def wait_for(self, predicate, timeout=None):
+ result = predicate()
+ if result:
+ return result
+ if timeout is not None:
+ endtime = _time() + timeout
+ else:
+ endtime = None
+ waittime = None
+ while not result:
+ if endtime is not None:
+ waittime = endtime - _time()
+ if waittime <= 0:
+ break
+ self.wait(waittime)
+ result = predicate()
+ return result
+
class EventProxy(BaseProxy):
_exposed_ = ('is_set', 'set', 'clear', 'wait')
@@ -1012,6 +996,26 @@ class EventProxy(BaseProxy):
def wait(self, timeout=None):
return self._callmethod('wait', (timeout,))
+
+class BarrierProxy(BaseProxy):
+ _exposed_ = ('__getattribute__', 'wait', 'abort', 'reset')
+ def wait(self, timeout=None):
+ return self._callmethod('wait', (timeout,))
+ def abort(self):
+ return self._callmethod('abort')
+ def reset(self):
+ return self._callmethod('reset')
+ @property
+ def parties(self):
+ return self._callmethod('__getattribute__', ('parties',))
+ @property
+ def n_waiting(self):
+ return self._callmethod('__getattribute__', ('n_waiting',))
+ @property
+ def broken(self):
+ return self._callmethod('__getattribute__', ('broken',))
+
+
class NamespaceProxy(BaseProxy):
_exposed_ = ('__getattribute__', '__setattr__', '__delattr__')
def __getattr__(self, key):
@@ -1041,12 +1045,11 @@ class ValueProxy(BaseProxy):
BaseListProxy = MakeProxyType('BaseListProxy', (
- '__add__', '__contains__', '__delitem__', '__delslice__',
- '__getitem__', '__getslice__', '__len__', '__mul__',
- '__reversed__', '__rmul__', '__setitem__', '__setslice__',
+ '__add__', '__contains__', '__delitem__', '__getitem__', '__len__',
+ '__mul__', '__reversed__', '__rmul__', '__setitem__',
'append', 'count', 'extend', 'index', 'insert', 'pop', 'remove',
'reverse', 'sort', '__imul__'
- )) # XXX __getslice__ and __setslice__ unneeded in Py3.0
+ ))
class ListProxy(BaseListProxy):
def __iadd__(self, value):
self._callmethod('extend', (value,))
@@ -1064,17 +1067,18 @@ DictProxy = MakeProxyType('DictProxy', (
ArrayProxy = MakeProxyType('ArrayProxy', (
- '__len__', '__getitem__', '__setitem__', '__getslice__', '__setslice__'
- )) # XXX __getslice__ and __setslice__ unneeded in Py3.0
+ '__len__', '__getitem__', '__setitem__'
+ ))
PoolProxy = MakeProxyType('PoolProxy', (
'apply', 'apply_async', 'close', 'imap', 'imap_unordered', 'join',
- 'map', 'map_async', 'terminate'
+ 'map', 'map_async', 'starmap', 'starmap_async', 'terminate'
))
PoolProxy._method_to_typeid_ = {
'apply_async': 'AsyncResult',
'map_async': 'AsyncResult',
+ 'starmap_async': 'AsyncResult',
'imap': 'Iterator',
'imap_unordered': 'Iterator'
}
@@ -1103,6 +1107,7 @@ SyncManager.register('Semaphore', threading.Semaphore, AcquirerProxy)
SyncManager.register('BoundedSemaphore', threading.BoundedSemaphore,
AcquirerProxy)
SyncManager.register('Condition', threading.Condition, ConditionProxy)
+SyncManager.register('Barrier', threading.Barrier, BarrierProxy)
SyncManager.register('Pool', Pool, PoolProxy)
SyncManager.register('list', list, ListProxy)
SyncManager.register('dict', dict, DictProxy)
diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py
index 7502ff8..ec57939 100644
--- a/Lib/multiprocessing/pool.py
+++ b/Lib/multiprocessing/pool.py
@@ -4,32 +4,7 @@
# multiprocessing/pool.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
__all__ = ['Pool']
@@ -64,6 +39,9 @@ job_counter = itertools.count()
def mapstar(args):
return list(map(*args))
+def starmapstar(args):
+ return list(itertools.starmap(args[0], args[1]))
+
#
# Code run by worker processes
#
@@ -247,14 +225,30 @@ class Pool(object):
Apply `func` to each element in `iterable`, collecting the results
in a list that is returned.
'''
- assert self._state == RUN
- return self.map_async(func, iterable, chunksize).get()
+ return self._map_async(func, iterable, mapstar, chunksize).get()
+
+ def starmap(self, func, iterable, chunksize=None):
+ '''
+ Like `map()` method but the elements of the `iterable` are expected to
+ be iterables as well and will be unpacked as arguments. Hence
+ `func` and (a, b) becomes func(a, b).
+ '''
+ return self._map_async(func, iterable, starmapstar, chunksize).get()
+
+ def starmap_async(self, func, iterable, chunksize=None, callback=None,
+ error_callback=None):
+ '''
+ Asynchronous version of `starmap()` method.
+ '''
+ return self._map_async(func, iterable, starmapstar, chunksize,
+ callback, error_callback)
def imap(self, func, iterable, chunksize=1):
'''
Equivalent of `map()` -- can be MUCH slower than `Pool.map()`.
'''
- assert self._state == RUN
+ if self._state != RUN:
+ raise ValueError("Pool not running")
if chunksize == 1:
result = IMapIterator(self._cache)
self._taskqueue.put((((result._job, i, func, (x,), {})
@@ -272,7 +266,8 @@ class Pool(object):
'''
Like `imap()` method but ordering of results is arbitrary.
'''
- assert self._state == RUN
+ if self._state != RUN:
+ raise ValueError("Pool not running")
if chunksize == 1:
result = IMapUnorderedIterator(self._cache)
self._taskqueue.put((((result._job, i, func, (x,), {})
@@ -291,7 +286,8 @@ class Pool(object):
'''
Asynchronous version of `apply()` method.
'''
- assert self._state == RUN
+ if self._state != RUN:
+ raise ValueError("Pool not running")
result = ApplyResult(self._cache, callback, error_callback)
self._taskqueue.put(([(result._job, None, func, args, kwds)], None))
return result
@@ -301,7 +297,15 @@ class Pool(object):
'''
Asynchronous version of `map()` method.
'''
- assert self._state == RUN
+ return self._map_async(func, iterable, mapstar, chunksize)
+
+ def _map_async(self, func, iterable, mapper, chunksize=None, callback=None,
+ error_callback=None):
+ '''
+ Helper function to implement map, starmap and their async counterparts.
+ '''
+ if self._state != RUN:
+ raise ValueError("Pool not running")
if not hasattr(iterable, '__len__'):
iterable = list(iterable)
@@ -315,7 +319,7 @@ class Pool(object):
task_batches = Pool._get_tasks(func, iterable, chunksize)
result = MapResult(self._cache, chunksize, len(iterable), callback,
error_callback=error_callback)
- self._taskqueue.put((((result._job, i, mapstar, (x,), {})
+ self._taskqueue.put((((result._job, i, mapper, (x,), {})
for i, x in enumerate(task_batches)), None))
return result
@@ -519,6 +523,12 @@ class Pool(object):
debug('cleaning up worker %d' % p.pid)
p.join()
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self.terminate()
+
#
# Class whose instances are returned by `Pool.apply_async()`
#
@@ -526,32 +536,26 @@ class Pool(object):
class ApplyResult(object):
def __init__(self, cache, callback, error_callback):
- self._cond = threading.Condition(threading.Lock())
+ self._event = threading.Event()
self._job = next(job_counter)
self._cache = cache
- self._ready = False
self._callback = callback
self._error_callback = error_callback
cache[self._job] = self
def ready(self):
- return self._ready
+ return self._event.is_set()
def successful(self):
- assert self._ready
+ assert self.ready()
return self._success
def wait(self, timeout=None):
- self._cond.acquire()
- try:
- if not self._ready:
- self._cond.wait(timeout)
- finally:
- self._cond.release()
+ self._event.wait(timeout)
def get(self, timeout=None):
self.wait(timeout)
- if not self._ready:
+ if not self.ready():
raise TimeoutError
if self._success:
return self._value
@@ -564,12 +568,7 @@ class ApplyResult(object):
self._callback(self._value)
if self._error_callback and not self._success:
self._error_callback(self._value)
- self._cond.acquire()
- try:
- self._ready = True
- self._cond.notify()
- finally:
- self._cond.release()
+ self._event.set()
del self._cache[self._job]
#
@@ -586,7 +585,7 @@ class MapResult(ApplyResult):
self._chunksize = chunksize
if chunksize <= 0:
self._number_left = 0
- self._ready = True
+ self._event.set()
del cache[self._job]
else:
self._number_left = length//chunksize + bool(length % chunksize)
@@ -600,24 +599,14 @@ class MapResult(ApplyResult):
if self._callback:
self._callback(self._value)
del self._cache[self._job]
- self._cond.acquire()
- try:
- self._ready = True
- self._cond.notify()
- finally:
- self._cond.release()
+ self._event.set()
else:
self._success = False
self._value = result
if self._error_callback:
self._error_callback(self._value)
del self._cache[self._job]
- self._cond.acquire()
- try:
- self._ready = True
- self._cond.notify()
- finally:
- self._cond.release()
+ self._event.set()
#
# Class whose instances are returned by `Pool.imap()`
diff --git a/Lib/multiprocessing/process.py b/Lib/multiprocessing/process.py
index 3262b50..893507b 100644
--- a/Lib/multiprocessing/process.py
+++ b/Lib/multiprocessing/process.py
@@ -4,32 +4,7 @@
# multiprocessing/process.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
__all__ = ['Process', 'current_process', 'active_children']
@@ -92,12 +67,16 @@ class Process(object):
'''
_Popen = None
- def __init__(self, group=None, target=None, name=None, args=(), kwargs={}):
+ def __init__(self, group=None, target=None, name=None, args=(), kwargs={},
+ *, daemon=None):
assert group is None, 'group argument must be None for now'
count = next(_current_process._counter)
self._identity = _current_process._identity + (count,)
self._authkey = _current_process._authkey
- self._daemonic = _current_process._daemonic
+ if daemon is not None:
+ self._daemonic = daemon
+ else:
+ self._daemonic = _current_process._daemonic
self._tempdir = _current_process._tempdir
self._parent_pid = os.getpid()
self._popen = None
@@ -130,6 +109,7 @@ class Process(object):
else:
from .forking import Popen
self._popen = Popen(self)
+ self._sentinel = self._popen.sentinel
_current_process._children.add(self)
def terminate(self):
@@ -216,6 +196,17 @@ class Process(object):
pid = ident
+ @property
+ def sentinel(self):
+ '''
+ Return a file descriptor (Unix) or handle (Windows) suitable for
+ waiting for process termination.
+ '''
+ try:
+ return self._sentinel
+ except AttributeError:
+ raise ValueError("process not started")
+
def __repr__(self):
if self is _current_process:
status = 'started'
diff --git a/Lib/multiprocessing/queues.py b/Lib/multiprocessing/queues.py
index 51d9912..37271fb 100644
--- a/Lib/multiprocessing/queues.py
+++ b/Lib/multiprocessing/queues.py
@@ -4,32 +4,7 @@
# multiprocessing/queues.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
__all__ = ['Queue', 'SimpleQueue', 'JoinableQueue']
@@ -39,12 +14,12 @@ import os
import threading
import collections
import time
-import atexit
import weakref
+import errno
from queue import Empty, Full
import _multiprocessing
-from multiprocessing import Pipe
+from multiprocessing.connection import Pipe
from multiprocessing.synchronize import Lock, BoundedSemaphore, Semaphore, Condition
from multiprocessing.util import debug, info, Finalize, register_after_fork
from multiprocessing.forking import assert_spawning
@@ -67,6 +42,8 @@ class Queue(object):
else:
self._wlock = Lock()
self._sem = BoundedSemaphore(maxsize)
+ # For use by concurrent.futures
+ self._ignore_epipe = False
self._after_fork()
@@ -75,11 +52,11 @@ class Queue(object):
def __getstate__(self):
assert_spawning(self)
- return (self._maxsize, self._reader, self._writer,
+ return (self._ignore_epipe, self._maxsize, self._reader, self._writer,
self._rlock, self._wlock, self._sem, self._opid)
def __setstate__(self, state):
- (self._maxsize, self._reader, self._writer,
+ (self._ignore_epipe, self._maxsize, self._reader, self._writer,
self._rlock, self._wlock, self._sem, self._opid) = state
self._after_fork()
@@ -182,7 +159,7 @@ class Queue(object):
self._thread = threading.Thread(
target=Queue._feed,
args=(self._buffer, self._notempty, self._send,
- self._wlock, self._writer.close),
+ self._wlock, self._writer.close, self._ignore_epipe),
name='QueueFeederThread'
)
self._thread.daemon = True
@@ -233,7 +210,7 @@ class Queue(object):
notempty.release()
@staticmethod
- def _feed(buffer, notempty, send, writelock, close):
+ def _feed(buffer, notempty, send, writelock, close, ignore_epipe):
debug('starting thread to feed data to pipe')
from .util import is_exiting
@@ -275,6 +252,8 @@ class Queue(object):
except IndexError:
pass
except Exception as e:
+ if ignore_epipe and getattr(e, 'errno', 0) == errno.EPIPE:
+ return
# Since this runs in a daemon thread the resources it uses
# may be become unusable while the process is cleaning up.
# We ignore errors which happen after the process has
@@ -356,6 +335,7 @@ class SimpleQueue(object):
def __init__(self):
self._reader, self._writer = Pipe(duplex=False)
self._rlock = Lock()
+ self._poll = self._reader.poll
if sys.platform == 'win32':
self._wlock = None
else:
@@ -363,7 +343,7 @@ class SimpleQueue(object):
self._make_methods()
def empty(self):
- return not self._reader.poll()
+ return not self._poll()
def __getstate__(self):
assert_spawning(self)
diff --git a/Lib/multiprocessing/reduction.py b/Lib/multiprocessing/reduction.py
index 6e5e5bc..656fa8f 100644
--- a/Lib/multiprocessing/reduction.py
+++ b/Lib/multiprocessing/reduction.py
@@ -5,53 +5,29 @@
# multiprocessing/reduction.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
-__all__ = []
+__all__ = ['reduce_socket', 'reduce_connection', 'send_handle', 'recv_handle']
import os
import sys
import socket
import threading
+import struct
+import signal
-import _multiprocessing
from multiprocessing import current_process
-from multiprocessing.forking import Popen, duplicate, close, ForkingPickler
from multiprocessing.util import register_after_fork, debug, sub_debug
-from multiprocessing.connection import Client, Listener
+from multiprocessing.util import is_exiting, sub_warning
#
#
#
-if not(sys.platform == 'win32' or hasattr(_multiprocessing, 'recvfd')):
+if not(sys.platform == 'win32' or (hasattr(socket, 'CMSG_LEN') and
+ hasattr(socket, 'SCM_RIGHTS'))):
raise ImportError('pickling of connections not supported')
#
@@ -59,157 +35,246 @@ if not(sys.platform == 'win32' or hasattr(_multiprocessing, 'recvfd')):
#
if sys.platform == 'win32':
- import _subprocess
- from _multiprocessing import win32
-
- def send_handle(conn, handle, destination_pid):
- process_handle = win32.OpenProcess(
- win32.PROCESS_ALL_ACCESS, False, destination_pid
- )
- try:
- new_handle = duplicate(handle, process_handle)
- conn.send(new_handle)
- finally:
- close(process_handle)
-
- def recv_handle(conn):
- return conn.recv()
+ # Windows
+ __all__ += ['reduce_pipe_connection']
+ import _winapi
-else:
def send_handle(conn, handle, destination_pid):
- _multiprocessing.sendfd(conn.fileno(), handle)
+ dh = DupHandle(handle, _winapi.DUPLICATE_SAME_ACCESS, destination_pid)
+ conn.send(dh)
def recv_handle(conn):
- return _multiprocessing.recvfd(conn.fileno())
-
-#
-# Support for a per-process server thread which caches pickled handles
-#
-
-_cache = set()
-
-def _reset(obj):
- global _lock, _listener, _cache
- for h in _cache:
- close(h)
- _cache.clear()
- _lock = threading.Lock()
- _listener = None
-
-_reset(None)
-register_after_fork(_reset, _reset)
-
-def _get_listener():
- global _listener
-
- if _listener is None:
- _lock.acquire()
- try:
- if _listener is None:
- debug('starting listener and thread for sending handles')
- _listener = Listener(authkey=current_process().authkey)
- t = threading.Thread(target=_serve)
- t.daemon = True
- t.start()
- finally:
- _lock.release()
-
- return _listener
-
-def _serve():
- from .util import is_exiting, sub_warning
-
- while 1:
- try:
- conn = _listener.accept()
- handle_wanted, destination_pid = conn.recv()
- _cache.remove(handle_wanted)
- send_handle(conn, handle_wanted, destination_pid)
- close(handle_wanted)
- conn.close()
- except:
- if not is_exiting():
- import traceback
- sub_warning(
- 'thread for sharing handles raised exception :\n' +
- '-'*79 + '\n' + traceback.format_exc() + '-'*79
- )
-
-#
-# Functions to be used for pickling/unpickling objects with handles
-#
-
-def reduce_handle(handle):
- if Popen.thread_is_spawning():
- return (None, Popen.duplicate_for_child(handle), True)
- dup_handle = duplicate(handle)
- _cache.add(dup_handle)
- sub_debug('reducing handle %d', handle)
- return (_get_listener().address, dup_handle, False)
-
-def rebuild_handle(pickled_data):
- address, handle, inherited = pickled_data
- if inherited:
- return handle
- sub_debug('rebuilding handle %d', handle)
- conn = Client(address, authkey=current_process().authkey)
- conn.send((handle, os.getpid()))
- new_handle = recv_handle(conn)
- conn.close()
- return new_handle
+ return conn.recv().detach()
+
+ class DupHandle(object):
+ def __init__(self, handle, access, pid=None):
+ # duplicate handle for process with given pid
+ if pid is None:
+ pid = os.getpid()
+ proc = _winapi.OpenProcess(_winapi.PROCESS_DUP_HANDLE, False, pid)
+ try:
+ self._handle = _winapi.DuplicateHandle(
+ _winapi.GetCurrentProcess(),
+ handle, proc, access, False, 0)
+ finally:
+ _winapi.CloseHandle(proc)
+ self._access = access
+ self._pid = pid
+
+ def detach(self):
+ # retrieve handle from process which currently owns it
+ if self._pid == os.getpid():
+ return self._handle
+ proc = _winapi.OpenProcess(_winapi.PROCESS_DUP_HANDLE, False,
+ self._pid)
+ try:
+ return _winapi.DuplicateHandle(
+ proc, self._handle, _winapi.GetCurrentProcess(),
+ self._access, False, _winapi.DUPLICATE_CLOSE_SOURCE)
+ finally:
+ _winapi.CloseHandle(proc)
+
+ class DupSocket(object):
+ def __init__(self, sock):
+ new_sock = sock.dup()
+ def send(conn, pid):
+ share = new_sock.share(pid)
+ conn.send_bytes(share)
+ self._id = resource_sharer.register(send, new_sock.close)
+
+ def detach(self):
+ conn = resource_sharer.get_connection(self._id)
+ try:
+ share = conn.recv_bytes()
+ return socket.fromshare(share)
+ finally:
+ conn.close()
+
+ def reduce_socket(s):
+ return rebuild_socket, (DupSocket(s),)
+
+ def rebuild_socket(ds):
+ return ds.detach()
+
+ def reduce_connection(conn):
+ handle = conn.fileno()
+ with socket.fromfd(handle, socket.AF_INET, socket.SOCK_STREAM) as s:
+ ds = DupSocket(s)
+ return rebuild_connection, (ds, conn.readable, conn.writable)
+
+ def rebuild_connection(ds, readable, writable):
+ from .connection import Connection
+ sock = ds.detach()
+ return Connection(sock.detach(), readable, writable)
-#
-# Register `_multiprocessing.Connection` with `ForkingPickler`
-#
-
-def reduce_connection(conn):
- rh = reduce_handle(conn.fileno())
- return rebuild_connection, (rh, conn.readable, conn.writable)
-
-def rebuild_connection(reduced_handle, readable, writable):
- handle = rebuild_handle(reduced_handle)
- return _multiprocessing.Connection(
- handle, readable=readable, writable=writable
- )
-
-ForkingPickler.register(_multiprocessing.Connection, reduce_connection)
+ def reduce_pipe_connection(conn):
+ access = ((_winapi.FILE_GENERIC_READ if conn.readable else 0) |
+ (_winapi.FILE_GENERIC_WRITE if conn.writable else 0))
+ dh = DupHandle(conn.fileno(), access)
+ return rebuild_pipe_connection, (dh, conn.readable, conn.writable)
-#
-# Register `socket.socket` with `ForkingPickler`
-#
+ def rebuild_pipe_connection(dh, readable, writable):
+ from .connection import PipeConnection
+ handle = dh.detach()
+ return PipeConnection(handle, readable, writable)
-def fromfd(fd, family, type_, proto=0):
- s = socket.fromfd(fd, family, type_, proto)
- if s.__class__ is not socket.socket:
- s = socket.socket(_sock=s)
- return s
+else:
+ # Unix
-def reduce_socket(s):
- reduced_handle = reduce_handle(s.fileno())
- return rebuild_socket, (reduced_handle, s.family, s.type, s.proto)
+ # On MacOSX we should acknowledge receipt of fds -- see Issue14669
+ ACKNOWLEDGE = sys.platform == 'darwin'
-def rebuild_socket(reduced_handle, family, type_, proto):
- fd = rebuild_handle(reduced_handle)
- _sock = fromfd(fd, family, type_, proto)
- close(fd)
- return _sock
+ def send_handle(conn, handle, destination_pid):
+ with socket.fromfd(conn.fileno(), socket.AF_UNIX, socket.SOCK_STREAM) as s:
+ s.sendmsg([b'x'], [(socket.SOL_SOCKET, socket.SCM_RIGHTS,
+ struct.pack("@i", handle))])
+ if ACKNOWLEDGE and conn.recv_bytes() != b'ACK':
+ raise RuntimeError('did not receive acknowledgement of fd')
-ForkingPickler.register(socket.socket, reduce_socket)
+ def recv_handle(conn):
+ size = struct.calcsize("@i")
+ with socket.fromfd(conn.fileno(), socket.AF_UNIX, socket.SOCK_STREAM) as s:
+ msg, ancdata, flags, addr = s.recvmsg(1, socket.CMSG_LEN(size))
+ try:
+ if ACKNOWLEDGE:
+ conn.send_bytes(b'ACK')
+ cmsg_level, cmsg_type, cmsg_data = ancdata[0]
+ if (cmsg_level == socket.SOL_SOCKET and
+ cmsg_type == socket.SCM_RIGHTS):
+ return struct.unpack("@i", cmsg_data[:size])[0]
+ except (ValueError, IndexError, struct.error):
+ pass
+ raise RuntimeError('Invalid data received')
+
+ class DupFd(object):
+ def __init__(self, fd):
+ new_fd = os.dup(fd)
+ def send(conn, pid):
+ send_handle(conn, new_fd, pid)
+ def close():
+ os.close(new_fd)
+ self._id = resource_sharer.register(send, close)
+
+ def detach(self):
+ conn = resource_sharer.get_connection(self._id)
+ try:
+ return recv_handle(conn)
+ finally:
+ conn.close()
+
+ def reduce_socket(s):
+ df = DupFd(s.fileno())
+ return rebuild_socket, (df, s.family, s.type, s.proto)
+
+ def rebuild_socket(df, family, type, proto):
+ fd = df.detach()
+ s = socket.fromfd(fd, family, type, proto)
+ os.close(fd)
+ return s
+
+ def reduce_connection(conn):
+ df = DupFd(conn.fileno())
+ return rebuild_connection, (df, conn.readable, conn.writable)
+
+ def rebuild_connection(df, readable, writable):
+ from .connection import Connection
+ fd = df.detach()
+ return Connection(fd, readable, writable)
#
-# Register `_multiprocessing.PipeConnection` with `ForkingPickler`
+# Server which shares registered resources with clients
#
-if sys.platform == 'win32':
-
- def reduce_pipe_connection(conn):
- rh = reduce_handle(conn.fileno())
- return rebuild_pipe_connection, (rh, conn.readable, conn.writable)
-
- def rebuild_pipe_connection(reduced_handle, readable, writable):
- handle = rebuild_handle(reduced_handle)
- return _multiprocessing.PipeConnection(
- handle, readable=readable, writable=writable
- )
-
- ForkingPickler.register(_multiprocessing.PipeConnection, reduce_pipe_connection)
+class ResourceSharer(object):
+ def __init__(self):
+ self._key = 0
+ self._cache = {}
+ self._old_locks = []
+ self._lock = threading.Lock()
+ self._listener = None
+ self._address = None
+ self._thread = None
+ register_after_fork(self, ResourceSharer._afterfork)
+
+ def register(self, send, close):
+ with self._lock:
+ if self._address is None:
+ self._start()
+ self._key += 1
+ self._cache[self._key] = (send, close)
+ return (self._address, self._key)
+
+ @staticmethod
+ def get_connection(ident):
+ from .connection import Client
+ address, key = ident
+ c = Client(address, authkey=current_process().authkey)
+ c.send((key, os.getpid()))
+ return c
+
+ def stop(self, timeout=None):
+ from .connection import Client
+ with self._lock:
+ if self._address is not None:
+ c = Client(self._address, authkey=current_process().authkey)
+ c.send(None)
+ c.close()
+ self._thread.join(timeout)
+ if self._thread.is_alive():
+ sub_warn('ResourceSharer thread did not stop when asked')
+ self._listener.close()
+ self._thread = None
+ self._address = None
+ self._listener = None
+ for key, (send, close) in self._cache.items():
+ close()
+ self._cache.clear()
+
+ def _afterfork(self):
+ for key, (send, close) in self._cache.items():
+ close()
+ self._cache.clear()
+ # If self._lock was locked at the time of the fork, it may be broken
+ # -- see issue 6721. Replace it without letting it be gc'ed.
+ self._old_locks.append(self._lock)
+ self._lock = threading.Lock()
+ if self._listener is not None:
+ self._listener.close()
+ self._listener = None
+ self._address = None
+ self._thread = None
+
+ def _start(self):
+ from .connection import Listener
+ assert self._listener is None
+ debug('starting listener and thread for sending handles')
+ self._listener = Listener(authkey=current_process().authkey)
+ self._address = self._listener.address
+ t = threading.Thread(target=self._serve)
+ t.daemon = True
+ t.start()
+ self._thread = t
+
+ def _serve(self):
+ if hasattr(signal, 'pthread_sigmask'):
+ signal.pthread_sigmask(signal.SIG_BLOCK, range(1, signal.NSIG))
+ while 1:
+ try:
+ conn = self._listener.accept()
+ msg = conn.recv()
+ if msg is None:
+ break
+ key, destination_pid = msg
+ send, close = self._cache.pop(key)
+ send(conn, destination_pid)
+ close()
+ conn.close()
+ except:
+ if not is_exiting():
+ import traceback
+ sub_warning(
+ 'thread for sharing handles raised exception :\n' +
+ '-'*79 + '\n' + traceback.format_exc() + '-'*79
+ )
+
+resource_sharer = ResourceSharer()
diff --git a/Lib/multiprocessing/sharedctypes.py b/Lib/multiprocessing/sharedctypes.py
index 1e694da..a358ed4 100644
--- a/Lib/multiprocessing/sharedctypes.py
+++ b/Lib/multiprocessing/sharedctypes.py
@@ -4,35 +4,9 @@
# multiprocessing/sharedctypes.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
-import sys
import ctypes
import weakref
@@ -89,7 +63,7 @@ def RawArray(typecode_or_type, size_or_initializer):
result.__init__(*size_or_initializer)
return result
-def Value(typecode_or_type, *args, lock=None):
+def Value(typecode_or_type, *args, lock=True):
'''
Return a synchronization wrapper for a Value
'''
@@ -102,13 +76,10 @@ def Value(typecode_or_type, *args, lock=None):
raise AttributeError("'%r' has no method 'acquire'" % lock)
return synchronized(obj, lock)
-def Array(typecode_or_type, size_or_initializer, **kwds):
+def Array(typecode_or_type, size_or_initializer, *, lock=True):
'''
Return a synchronization wrapper for a RawArray
'''
- lock = kwds.pop('lock', None)
- if kwds:
- raise ValueError('unrecognized keyword argument(s): %s' % list(kwds.keys()))
obj = RawArray(typecode_or_type, size_or_initializer)
if lock is False:
return obj
@@ -158,7 +129,8 @@ def rebuild_ctype(type_, wrapper, length):
if length is not None:
type_ = type_ * length
ForkingPickler.register(type_, reduce_ctype)
- obj = type_.from_address(wrapper.get_address())
+ buf = wrapper.create_memoryview()
+ obj = type_.from_buffer(buf)
obj._wrapper = wrapper
return obj
diff --git a/Lib/multiprocessing/synchronize.py b/Lib/multiprocessing/synchronize.py
index 70ae825..22eabe5 100644
--- a/Lib/multiprocessing/synchronize.py
+++ b/Lib/multiprocessing/synchronize.py
@@ -4,32 +4,7 @@
# multiprocessing/synchronize.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
__all__ = [
@@ -37,15 +12,13 @@ __all__ = [
]
import threading
-import os
import sys
-from time import time as _time, sleep as _sleep
-
import _multiprocessing
from multiprocessing.process import current_process
-from multiprocessing.util import Finalize, register_after_fork, debug
+from multiprocessing.util import register_after_fork, debug
from multiprocessing.forking import assert_spawning, Popen
+from time import time as _time
# Try to import the mp.synchronize module cleanly, if it fails
# raise ImportError for platforms lacking a working sem_open implementation.
@@ -243,7 +216,7 @@ class Condition(object):
try:
# wait for notification or timeout
- ret = self._wait_semaphore.acquire(True, timeout)
+ return self._wait_semaphore.acquire(True, timeout)
finally:
# indicate that this thread has woken
self._woken_count.release()
@@ -251,7 +224,6 @@ class Condition(object):
# reacquire lock
for i in range(count):
self._lock.acquire()
- return ret
def notify(self):
assert self._lock._semlock._is_mine(), 'lock is not owned'
@@ -293,6 +265,24 @@ class Condition(object):
while self._wait_semaphore.acquire(False):
pass
+ def wait_for(self, predicate, timeout=None):
+ result = predicate()
+ if result:
+ return result
+ if timeout is not None:
+ endtime = _time() + timeout
+ else:
+ endtime = None
+ waittime = None
+ while not result:
+ if endtime is not None:
+ waittime = endtime - _time()
+ if waittime <= 0:
+ break
+ self.wait(waittime)
+ result = predicate()
+ return result
+
#
# Event
#
@@ -343,3 +333,43 @@ class Event(object):
return False
finally:
self._cond.release()
+
+#
+# Barrier
+#
+
+class Barrier(threading.Barrier):
+
+ def __init__(self, parties, action=None, timeout=None):
+ import struct
+ from multiprocessing.heap import BufferWrapper
+ wrapper = BufferWrapper(struct.calcsize('i') * 2)
+ cond = Condition()
+ self.__setstate__((parties, action, timeout, cond, wrapper))
+ self._state = 0
+ self._count = 0
+
+ def __setstate__(self, state):
+ (self._parties, self._action, self._timeout,
+ self._cond, self._wrapper) = state
+ self._array = self._wrapper.create_memoryview().cast('i')
+
+ def __getstate__(self):
+ return (self._parties, self._action, self._timeout,
+ self._cond, self._wrapper)
+
+ @property
+ def _state(self):
+ return self._array[0]
+
+ @_state.setter
+ def _state(self, value):
+ self._array[0] = value
+
+ @property
+ def _count(self):
+ return self._array[1]
+
+ @_count.setter
+ def _count(self, value):
+ self._array[1] = value
diff --git a/Lib/multiprocessing/util.py b/Lib/multiprocessing/util.py
index 20bba37..7495813 100644
--- a/Lib/multiprocessing/util.py
+++ b/Lib/multiprocessing/util.py
@@ -4,39 +4,18 @@
# multiprocessing/util.py
#
# Copyright (c) 2006-2008, R Oudkerk
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of author nor the names of any contributors may be
-# used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# Licensed to PSF under a Contributor Agreement.
#
+import sys
+import functools
+import os
import itertools
import weakref
import atexit
import threading # we want threading to install it's
# cleanup function before multiprocessing does
+from subprocess import _args_from_interpreter_flags
from multiprocessing.process import current_process, active_children
@@ -84,7 +63,7 @@ def get_logger():
Returns logger used by multiprocessing
'''
global _logger
- import logging, atexit
+ import logging
logging._acquireLock()
try:
@@ -183,10 +162,15 @@ class Finalize(object):
self._args = args
self._kwargs = kwargs or {}
self._key = (exitpriority, next(_finalizer_counter))
+ self._pid = os.getpid()
_finalizer_registry[self._key] = self
- def __call__(self, wr=None):
+ def __call__(self, wr=None,
+ # Need to bind these locally because the globals can have
+ # been cleared at shutdown
+ _finalizer_registry=_finalizer_registry,
+ sub_debug=sub_debug, getpid=os.getpid):
'''
Run the callback unless it has already been called or cancelled
'''
@@ -195,9 +179,13 @@ class Finalize(object):
except KeyError:
sub_debug('finalizer no longer registered')
else:
- sub_debug('finalizer calling %s with args %s and kwargs %s',
- self._callback, self._args, self._kwargs)
- res = self._callback(*self._args, **self._kwargs)
+ if self._pid != getpid():
+ sub_debug('finalizer ignored because different process')
+ res = None
+ else:
+ sub_debug('finalizer calling %s with args %s and kwargs %s',
+ self._callback, self._args, self._kwargs)
+ res = self._callback(*self._args, **self._kwargs)
self._weakref = self._callback = self._args = \
self._kwargs = self._key = None
return res
@@ -299,16 +287,21 @@ def _exit_function(info=info, debug=debug, _run_finalizers=_run_finalizers,
info('process shutting down')
debug('running all "atexit" finalizers with priority >= 0')
_run_finalizers(0)
+
if current_process() is not None:
# We check if the current process is None here because if
- # it's None, any call to ``active_children()`` will throw an
- # AttributeError (active_children winds up trying to get
- # attributes from util._current_process). This happens in a
- # variety of shutdown circumstances that are not well-understood
- # because module-scope variables are not apparently supposed to
- # be destroyed until after this function is called. However,
- # they are indeed destroyed before this function is called. See
- # issues #9775 and #15881. Also related: #4106, #9205, and #9207.
+ # it's None, any call to ``active_children()`` will throw
+ # an AttributeError (active_children winds up trying to
+ # get attributes from util._current_process). One
+ # situation where this can happen is if someone has
+ # manipulated sys.modules, causing this module to be
+ # garbage collected. The destructor for the module type
+ # then replaces all values in the module dict with None.
+ # For instance, after setuptools runs a test it replaces
+ # sys.modules with a copy created earlier. See issues
+ # #9775 and #15881. Also related: #4106, #9205, and
+ # #9207.
+
for p in active_children():
if p._daemonic:
info('calling terminate() for daemon %s', p.name)
diff --git a/Lib/nntplib.py b/Lib/nntplib.py
index 32bffd8..2de6ebd 100644
--- a/Lib/nntplib.py
+++ b/Lib/nntplib.py
@@ -166,7 +166,7 @@ def decode_header(header_str):
parts.append(v.decode(enc or 'ascii'))
else:
parts.append(v)
- return ' '.join(parts)
+ return ''.join(parts)
def _parse_overview_fmt(lines):
"""Parse a list of string representing the response to LIST OVERVIEW.FMT
@@ -351,6 +351,20 @@ class _NNTPBase:
# Log in and encryption setup order is left to subclasses.
self.authenticated = False
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ is_connected = lambda: hasattr(self, "file")
+ if is_connected():
+ try:
+ self.quit()
+ except (socket.error, EOFError):
+ pass
+ finally:
+ if is_connected():
+ self._close()
+
def getwelcome(self):
"""Get the welcome message from the server
(this is read and squirreled away by __init__()).
@@ -819,7 +833,7 @@ class _NNTPBase:
- list: list of (name,title) strings"""
warnings.warn("The XGTITLE extension is not actively used, "
"use descriptions() instead",
- PendingDeprecationWarning, 2)
+ DeprecationWarning, 2)
line_pat = re.compile('^([^ \t]+)[ \t]+(.*)$')
resp, raw_lines = self._longcmdstring('XGTITLE ' + group, file)
lines = []
@@ -837,7 +851,7 @@ class _NNTPBase:
path: directory path to article
"""
warnings.warn("The XPATH extension is not actively used",
- PendingDeprecationWarning, 2)
+ DeprecationWarning, 2)
resp = self._shortcmd('XPATH {0}'.format(id))
if not resp.startswith('223'):
diff --git a/Lib/numbers.py b/Lib/numbers.py
index ecfad7c..b206457 100644
--- a/Lib/numbers.py
+++ b/Lib/numbers.py
@@ -5,7 +5,7 @@
TODO: Fill out more detailed documentation on the operators."""
-from abc import ABCMeta, abstractmethod, abstractproperty
+from abc import ABCMeta, abstractmethod
__all__ = ["Number", "Complex", "Real", "Rational", "Integral"]
@@ -50,7 +50,8 @@ class Complex(Number):
"""True if self != 0. Called for bool(self)."""
return self != 0
- @abstractproperty
+ @property
+ @abstractmethod
def real(self):
"""Retrieve the real component of this number.
@@ -58,7 +59,8 @@ class Complex(Number):
"""
raise NotImplementedError
- @abstractproperty
+ @property
+ @abstractmethod
def imag(self):
"""Retrieve the imaginary component of this number.
@@ -272,11 +274,13 @@ class Rational(Real):
__slots__ = ()
- @abstractproperty
+ @property
+ @abstractmethod
def numerator(self):
raise NotImplementedError
- @abstractproperty
+ @property
+ @abstractmethod
def denominator(self):
raise NotImplementedError
diff --git a/Lib/opcode.py b/Lib/opcode.py
index 8e15d13..a639fe3 100644
--- a/Lib/opcode.py
+++ b/Lib/opcode.py
@@ -6,7 +6,7 @@ operate on bytecodes (e.g. peephole optimizers).
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
- "HAVE_ARGUMENT", "EXTENDED_ARG"]
+ "HAVE_ARGUMENT", "EXTENDED_ARG", "hasnargs"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
@@ -18,6 +18,7 @@ hasjabs = []
haslocal = []
hascompare = []
hasfree = []
+hasnargs = []
opmap = {}
opname = [''] * 256
@@ -43,7 +44,6 @@ def jabs_op(name, op):
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
-def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
@@ -88,6 +88,7 @@ def_op('STORE_LOCALS', 69)
def_op('PRINT_EXPR', 70)
def_op('LOAD_BUILD_CLASS', 71)
+def_op('YIELD_FROM', 72)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
@@ -152,6 +153,7 @@ haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
+hasnargs.append(131)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
@@ -165,8 +167,11 @@ def_op('DELETE_DEREF', 138)
hasfree.append(138)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
+hasnargs.append(140)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
+hasnargs.append(141)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
+hasnargs.append(142)
jrel_op('SETUP_WITH', 143)
diff --git a/Lib/optparse.py b/Lib/optparse.py
index d97a1f7..37764d3 100644
--- a/Lib/optparse.py
+++ b/Lib/optparse.py
@@ -86,10 +86,16 @@ def _repr(self):
# Id: errors.py 509 2006-04-20 00:58:24Z gward
try:
- from gettext import gettext
+ from gettext import gettext, ngettext
except ImportError:
def gettext(message):
return message
+
+ def ngettext(singular, plural, n):
+ if n == 1:
+ return singular
+ return plural
+
_ = gettext
@@ -411,11 +417,8 @@ def _parse_num(val, type):
def _parse_int(val):
return _parse_num(val, int)
-def _parse_long(val):
- return _parse_num(val, int)
-
_builtin_cvt = { "int" : (_parse_int, _("integer")),
- "long" : (_parse_long, _("long integer")),
+ "long" : (_parse_int, _("integer")),
"float" : (float, _("floating-point")),
"complex" : (complex, _("complex")) }
@@ -1483,11 +1486,10 @@ class OptionParser (OptionContainer):
if option.takes_value():
nargs = option.nargs
if len(rargs) < nargs:
- if nargs == 1:
- self.error(_("%s option requires an argument") % opt)
- else:
- self.error(_("%s option requires %d arguments")
- % (opt, nargs))
+ self.error(ngettext(
+ "%(option)s option requires %(number)d argument",
+ "%(option)s option requires %(number)d arguments",
+ nargs) % {"option": opt, "number": nargs})
elif nargs == 1:
value = rargs.pop(0)
else:
@@ -1522,11 +1524,10 @@ class OptionParser (OptionContainer):
nargs = option.nargs
if len(rargs) < nargs:
- if nargs == 1:
- self.error(_("%s option requires an argument") % opt)
- else:
- self.error(_("%s option requires %d arguments")
- % (opt, nargs))
+ self.error(ngettext(
+ "%(option)s option requires %(number)d argument",
+ "%(option)s option requires %(number)d arguments",
+ nargs) % {"option": opt, "number": nargs})
elif nargs == 1:
value = rargs.pop(0)
else:
diff --git a/Lib/os.py b/Lib/os.py
index d1101a2..84eeaeb 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -24,13 +24,18 @@ and opendir), and leave all pathname manipulation to os.path
#'
import sys, errno
+import stat as st
_names = sys.builtin_module_names
# Note: more names are added to __all__ later.
__all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep",
- "defpath", "name", "path", "devnull",
- "SEEK_SET", "SEEK_CUR", "SEEK_END"]
+ "defpath", "name", "path", "devnull", "SEEK_SET", "SEEK_CUR",
+ "SEEK_END", "fsencode", "fsdecode", "get_exec_path", "fdopen",
+ "popen", "extsep"]
+
+def _exists(name):
+ return name in globals()
def _get_exports_list(module):
try:
@@ -38,19 +43,23 @@ def _get_exports_list(module):
except AttributeError:
return [n for n in dir(module) if n[0] != '_']
+# Any new dependencies of the os module and/or changes in path separator
+# requires updating importlib as well.
if 'posix' in _names:
name = 'posix'
linesep = '\n'
from posix import *
try:
from posix import _exit
+ __all__.append('_exit')
except ImportError:
pass
import posixpath as path
- import posix
- __all__.extend(_get_exports_list(posix))
- del posix
+ try:
+ from posix import _have_functions
+ except ImportError:
+ pass
elif 'nt' in _names:
name = 'nt'
@@ -58,6 +67,7 @@ elif 'nt' in _names:
from nt import *
try:
from nt import _exit
+ __all__.append('_exit')
except ImportError:
pass
import ntpath as path
@@ -66,12 +76,18 @@ elif 'nt' in _names:
__all__.extend(_get_exports_list(nt))
del nt
+ try:
+ from nt import _have_functions
+ except ImportError:
+ pass
+
elif 'os2' in _names:
name = 'os2'
linesep = '\r\n'
from os2 import *
try:
from os2 import _exit
+ __all__.append('_exit')
except ImportError:
pass
if sys.version.find('EMX GCC') == -1:
@@ -84,12 +100,18 @@ elif 'os2' in _names:
__all__.extend(_get_exports_list(os2))
del os2
+ try:
+ from os2 import _have_functions
+ except ImportError:
+ pass
+
elif 'ce' in _names:
name = 'ce'
linesep = '\r\n'
from ce import *
try:
from ce import _exit
+ __all__.append('_exit')
except ImportError:
pass
# We can use the standard Windows path.
@@ -99,6 +121,11 @@ elif 'ce' in _names:
__all__.extend(_get_exports_list(ce))
del ce
+ try:
+ from ce import _have_functions
+ except ImportError:
+ pass
+
else:
raise ImportError('no os specific module found')
@@ -108,8 +135,97 @@ from os.path import (curdir, pardir, sep, pathsep, defpath, extsep, altsep,
del _names
+
+if _exists("_have_functions"):
+ _globals = globals()
+ def _add(str, fn):
+ if (fn in _globals) and (str in _have_functions):
+ _set.add(_globals[fn])
+
+ _set = set()
+ _add("HAVE_FACCESSAT", "access")
+ _add("HAVE_FCHMODAT", "chmod")
+ _add("HAVE_FCHOWNAT", "chown")
+ _add("HAVE_FSTATAT", "stat")
+ _add("HAVE_FUTIMESAT", "utime")
+ _add("HAVE_LINKAT", "link")
+ _add("HAVE_MKDIRAT", "mkdir")
+ _add("HAVE_MKFIFOAT", "mkfifo")
+ _add("HAVE_MKNODAT", "mknod")
+ _add("HAVE_OPENAT", "open")
+ _add("HAVE_READLINKAT", "readlink")
+ _add("HAVE_RENAMEAT", "rename")
+ _add("HAVE_SYMLINKAT", "symlink")
+ _add("HAVE_UNLINKAT", "unlink")
+ _add("HAVE_UNLINKAT", "rmdir")
+ _add("HAVE_UTIMENSAT", "utime")
+ supports_dir_fd = _set
+
+ _set = set()
+ _add("HAVE_FACCESSAT", "access")
+ supports_effective_ids = _set
+
+ _set = set()
+ _add("HAVE_FCHDIR", "chdir")
+ _add("HAVE_FCHMOD", "chmod")
+ _add("HAVE_FCHOWN", "chown")
+ _add("HAVE_FDOPENDIR", "listdir")
+ _add("HAVE_FEXECVE", "execve")
+ _set.add(stat) # fstat always works
+ _add("HAVE_FTRUNCATE", "truncate")
+ _add("HAVE_FUTIMENS", "utime")
+ _add("HAVE_FUTIMES", "utime")
+ _add("HAVE_FPATHCONF", "pathconf")
+ if _exists("statvfs") and _exists("fstatvfs"): # mac os x10.3
+ _add("HAVE_FSTATVFS", "statvfs")
+ supports_fd = _set
+
+ _set = set()
+ _add("HAVE_FACCESSAT", "access")
+ # Some platforms don't support lchmod(). Often the function exists
+ # anyway, as a stub that always returns ENOSUP or perhaps EOPNOTSUPP.
+ # (No, I don't know why that's a good design.) ./configure will detect
+ # this and reject it--so HAVE_LCHMOD still won't be defined on such
+ # platforms. This is Very Helpful.
+ #
+ # However, sometimes platforms without a working lchmod() *do* have
+ # fchmodat(). (Examples: Linux kernel 3.2 with glibc 2.15,
+ # OpenIndiana 3.x.) And fchmodat() has a flag that theoretically makes
+ # it behave like lchmod(). So in theory it would be a suitable
+ # replacement for lchmod(). But when lchmod() doesn't work, fchmodat()'s
+ # flag doesn't work *either*. Sadly ./configure isn't sophisticated
+ # enough to detect this condition--it only determines whether or not
+ # fchmodat() minimally works.
+ #
+ # Therefore we simply ignore fchmodat() when deciding whether or not
+ # os.chmod supports follow_symlinks. Just checking lchmod() is
+ # sufficient. After all--if you have a working fchmodat(), your
+ # lchmod() almost certainly works too.
+ #
+ # _add("HAVE_FCHMODAT", "chmod")
+ _add("HAVE_FCHOWNAT", "chown")
+ _add("HAVE_FSTATAT", "stat")
+ _add("HAVE_LCHFLAGS", "chflags")
+ _add("HAVE_LCHMOD", "chmod")
+ if _exists("lchown"): # mac os x10.3
+ _add("HAVE_LCHOWN", "chown")
+ _add("HAVE_LINKAT", "link")
+ _add("HAVE_LUTIMES", "utime")
+ _add("HAVE_LSTAT", "stat")
+ _add("HAVE_FSTATAT", "stat")
+ _add("HAVE_UTIMENSAT", "utime")
+ _add("MS_WINDOWS", "stat")
+ supports_follow_symlinks = _set
+
+ del _set
+ del _have_functions
+ del _globals
+ del _add
+
+
# Python uses fixed values for the SEEK_ constants; they are mapped
# to native constants if necessary in posixmodule.c
+# Other possible SEEK values are directly imported from posixmodule.c
SEEK_SET = 0
SEEK_CUR = 1
SEEK_END = 2
@@ -120,8 +236,6 @@ def _get_masked_mode(mode):
umask(mask)
return mode & ~mask
-#'
-
# Super directory utilities.
# (Inspired by Eric Raymond; the doc strings are mostly his)
@@ -151,7 +265,6 @@ def makedirs(name, mode=0o777, exist_ok=False):
try:
mkdir(name, mode)
except OSError as e:
- import stat as st
dir_exists = path.isdir(name)
expected_mode = _get_masked_mode(mode)
if dir_exists:
@@ -163,6 +276,9 @@ def makedirs(name, mode=0o777, exist_ok=False):
actual_mode = -1
if not (e.errno == errno.EEXIST and exist_ok and dir_exists and
actual_mode == expected_mode):
+ if dir_exists and actual_mode != expected_mode:
+ e.strerror += ' (mode %o != expected mode %o)' % (
+ actual_mode, expected_mode)
raise
def removedirs(name):
@@ -300,13 +416,107 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
for name in dirs:
new_path = join(top, name)
if followlinks or not islink(new_path):
- for x in walk(new_path, topdown, onerror, followlinks):
- yield x
+ yield from walk(new_path, topdown, onerror, followlinks)
if not topdown:
yield top, dirs, nondirs
__all__.append("walk")
+if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
+
+ def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None):
+ """Directory tree generator.
+
+ This behaves exactly like walk(), except that it yields a 4-tuple
+
+ dirpath, dirnames, filenames, dirfd
+
+ `dirpath`, `dirnames` and `filenames` are identical to walk() output,
+ and `dirfd` is a file descriptor referring to the directory `dirpath`.
+
+ The advantage of fwalk() over walk() is that it's safe against symlink
+ races (when follow_symlinks is False).
+
+ If dir_fd is not None, it should be a file descriptor open to a directory,
+ and top should be relative; top will then be relative to that directory.
+ (dir_fd is always supported for fwalk.)
+
+ Caution:
+ Since fwalk() yields file descriptors, those are only valid until the
+ next iteration step, so you should dup() them if you want to keep them
+ for a longer period.
+
+ Example:
+
+ import os
+ for root, dirs, files, rootfd in os.fwalk('python/Lib/email'):
+ print(root, "consumes", end="")
+ print(sum([os.stat(name, dir_fd=rootfd).st_size for name in files]),
+ end="")
+ print("bytes in", len(files), "non-directory files")
+ if 'CVS' in dirs:
+ dirs.remove('CVS') # don't visit CVS directories
+ """
+ # Note: To guard against symlink races, we use the standard
+ # lstat()/open()/fstat() trick.
+ orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
+ topfd = open(top, O_RDONLY, dir_fd=dir_fd)
+ try:
+ if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
+ path.samestat(orig_st, stat(topfd)))):
+ yield from _fwalk(topfd, top, topdown, onerror, follow_symlinks)
+ finally:
+ close(topfd)
+
+ def _fwalk(topfd, toppath, topdown, onerror, follow_symlinks):
+ # Note: This uses O(depth of the directory tree) file descriptors: if
+ # necessary, it can be adapted to only require O(1) FDs, see issue
+ # #13734.
+
+ names = listdir(topfd)
+ dirs, nondirs = [], []
+ for name in names:
+ try:
+ # Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with
+ # walk() which reports symlinks to directories as directories.
+ # We do however check for symlinks before recursing into
+ # a subdirectory.
+ if st.S_ISDIR(stat(name, dir_fd=topfd).st_mode):
+ dirs.append(name)
+ else:
+ nondirs.append(name)
+ except FileNotFoundError:
+ try:
+ # Add dangling symlinks, ignore disappeared files
+ if st.S_ISLNK(stat(name, dir_fd=topfd, follow_symlinks=False)
+ .st_mode):
+ nondirs.append(name)
+ except FileNotFoundError:
+ continue
+
+ if topdown:
+ yield toppath, dirs, nondirs, topfd
+
+ for name in dirs:
+ try:
+ orig_st = stat(name, dir_fd=topfd, follow_symlinks=follow_symlinks)
+ dirfd = open(name, O_RDONLY, dir_fd=topfd)
+ except error as err:
+ if onerror is not None:
+ onerror(err)
+ return
+ try:
+ if follow_symlinks or path.samestat(orig_st, stat(dirfd)):
+ dirpath = path.join(toppath, name)
+ yield from _fwalk(dirfd, dirpath, topdown, onerror, follow_symlinks)
+ finally:
+ close(dirfd)
+
+ if not topdown:
+ yield toppath, dirs, nondirs, topfd
+
+ __all__.append("fwalk")
+
# Make sure os.environ exists, at least
try:
environ
@@ -443,7 +653,7 @@ def get_exec_path(env=None):
# Change environ to automatically call putenv(), unsetenv if they exist.
-from _abcoll import MutableMapping # Can't use collections (bootstrap)
+from collections.abc import MutableMapping
class _Environ(MutableMapping):
def __init__(self, data, encodekey, decodekey, encodevalue, decodevalue, putenv, unsetenv):
@@ -607,15 +817,14 @@ def _fscodec():
fsencode, fsdecode = _fscodec()
del _fscodec
-def _exists(name):
- return name in globals()
-
# Supply spawn*() (probably only for Unix)
if _exists("fork") and not _exists("spawnv") and _exists("execv"):
P_WAIT = 0
P_NOWAIT = P_NOWAITO = 1
+ __all__.extend(["P_WAIT", "P_NOWAIT", "P_NOWAITO"])
+
# XXX Should we support P_DETACH? I suppose it could fork()**2
# and close the std I/O streams. Also, P_OVERLAY is the same
# as execv*()?
@@ -776,7 +985,7 @@ def popen(cmd, mode="r", buffering=-1):
raise TypeError("invalid cmd type (%s, expected string)" % type(cmd))
if mode not in ("r", "w"):
raise ValueError("invalid mode %r" % mode)
- if buffering == 0 or buffering == None:
+ if buffering == 0 or buffering is None:
raise ValueError("popen() does not support unbuffered streams")
import subprocess, io
if mode == "r":
diff --git a/Lib/pdb.py b/Lib/pdb.py
index 6776a3f..3043391 100755
--- a/Lib/pdb.py
+++ b/Lib/pdb.py
@@ -73,6 +73,7 @@ import cmd
import bdb
import dis
import code
+import glob
import pprint
import signal
import inspect
@@ -155,6 +156,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
# Try to load readline if it exists
try:
import readline
+ # remove some common file name delimiters
+ readline.set_completer_delims(' \t\n`@#$%^&*()=+[{]}\\|;:\'",<>?')
except ImportError:
pass
self.allow_kbdint = False
@@ -445,6 +448,61 @@ class Pdb(bdb.Bdb, cmd.Cmd):
def error(self, msg):
print('***', msg, file=self.stdout)
+ # Generic completion functions. Individual complete_foo methods can be
+ # assigned below to one of these functions.
+
+ def _complete_location(self, text, line, begidx, endidx):
+ # Complete a file/module/function location for break/tbreak/clear.
+ if line.strip().endswith((':', ',')):
+ # Here comes a line number or a condition which we can't complete.
+ return []
+ # First, try to find matching functions (i.e. expressions).
+ try:
+ ret = self._complete_expression(text, line, begidx, endidx)
+ except Exception:
+ ret = []
+ # Then, try to complete file names as well.
+ globs = glob.glob(text + '*')
+ for fn in globs:
+ if os.path.isdir(fn):
+ ret.append(fn + '/')
+ elif os.path.isfile(fn) and fn.lower().endswith(('.py', '.pyw')):
+ ret.append(fn + ':')
+ return ret
+
+ def _complete_bpnumber(self, text, line, begidx, endidx):
+ # Complete a breakpoint number. (This would be more helpful if we could
+ # display additional info along with the completions, such as file/line
+ # of the breakpoint.)
+ return [str(i) for i, bp in enumerate(bdb.Breakpoint.bpbynumber)
+ if bp is not None and str(i).startswith(text)]
+
+ def _complete_expression(self, text, line, begidx, endidx):
+ # Complete an arbitrary expression.
+ if not self.curframe:
+ return []
+ # Collect globals and locals. It is usually not really sensible to also
+ # complete builtins, and they clutter the namespace quite heavily, so we
+ # leave them out.
+ ns = self.curframe.f_globals.copy()
+ ns.update(self.curframe_locals)
+ if '.' in text:
+ # Walk an attribute chain up to the last part, similar to what
+ # rlcompleter does. This will bail if any of the parts are not
+ # simple attribute access, which is what we want.
+ dotted = text.split('.')
+ try:
+ obj = ns[dotted[0]]
+ for part in dotted[1:-1]:
+ obj = getattr(obj, part)
+ except (KeyError, AttributeError):
+ return []
+ prefix = '.'.join(dotted[:-1]) + '.'
+ return [prefix + n for n in dir(obj) if n.startswith(dotted[-1])]
+ else:
+ # Complete a simple name.
+ return [n for n in ns.keys() if n.startswith(text)]
+
# Command definitions, called by cmdloop()
# The argument is the remaining string on the command line
# Return true to exit from the command loop
@@ -526,6 +584,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
self.commands_defining = False
self.prompt = prompt_back
+ complete_commands = _complete_bpnumber
+
def do_break(self, arg, temporary = 0):
"""b(reak) [ ([filename:]lineno | function) [, condition] ]
Without argument, list all breaks.
@@ -628,6 +688,9 @@ class Pdb(bdb.Bdb, cmd.Cmd):
do_b = do_break
+ complete_break = _complete_location
+ complete_b = _complete_location
+
def do_tbreak(self, arg):
"""tbreak [ ([filename:]lineno | function) [, condition] ]
Same arguments as break, but sets a temporary breakpoint: it
@@ -635,6 +698,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
"""
self.do_break(arg, 1)
+ complete_tbreak = _complete_location
+
def lineinfo(self, identifier):
failed = (None, None, None)
# Input is identifier, may be in single quotes
@@ -704,6 +769,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
bp.enable()
self.message('Enabled %s' % bp)
+ complete_enable = _complete_bpnumber
+
def do_disable(self, arg):
"""disable bpnumber [bpnumber ...]
Disables the breakpoints given as a space separated list of
@@ -722,6 +789,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
bp.disable()
self.message('Disabled %s' % bp)
+ complete_disable = _complete_bpnumber
+
def do_condition(self, arg):
"""condition bpnumber [condition]
Set a new condition for the breakpoint, an expression which
@@ -745,6 +814,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
else:
self.message('New condition set for breakpoint %d.' % bp.number)
+ complete_condition = _complete_bpnumber
+
def do_ignore(self, arg):
"""ignore bpnumber [count]
Set the ignore count for the given breakpoint number. If
@@ -776,6 +847,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
self.message('Will stop next time breakpoint %d is reached.'
% bp.number)
+ complete_ignore = _complete_bpnumber
+
def do_clear(self, arg):
"""cl(ear) filename:lineno\ncl(ear) [bpnumber [bpnumber...]]
With a space separated list of breakpoint numbers, clear
@@ -824,6 +897,9 @@ class Pdb(bdb.Bdb, cmd.Cmd):
self.message('Deleted %s' % bp)
do_cl = do_clear # 'c' is already an abbreviation for 'continue'
+ complete_clear = _complete_location
+ complete_cl = _complete_location
+
def do_where(self, arg):
"""w(here)
Print a stack trace, with the most recent frame at the bottom.
@@ -1007,6 +1083,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
sys.settrace(self.trace_dispatch)
self.lastcmd = p.lastcmd
+ complete_debug = _complete_expression
+
def do_quit(self, arg):
"""q(uit)\nexit
Quit from the debugger. The program being executed is aborted.
@@ -1093,6 +1171,10 @@ class Pdb(bdb.Bdb, cmd.Cmd):
except:
pass
+ complete_print = _complete_expression
+ complete_p = _complete_expression
+ complete_pp = _complete_expression
+
def do_list(self, arg):
"""l(ist) [first [,last] | .]
@@ -1173,6 +1255,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
return
self._print_lines(lines, lineno)
+ complete_source = _complete_expression
+
def _print_lines(self, lines, start, breaks=(), frame=None):
"""Print a range of lines."""
if frame:
@@ -1227,6 +1311,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
# None of the above...
self.message(type(value))
+ complete_whatis = _complete_expression
+
def do_display(self, arg):
"""display [expression]
@@ -1244,6 +1330,8 @@ class Pdb(bdb.Bdb, cmd.Cmd):
self.displaying.setdefault(self.curframe, {})[arg] = val
self.message('display %s: %r' % (arg, val))
+ complete_display = _complete_expression
+
def do_undisplay(self, arg):
"""undisplay [expression]
@@ -1259,6 +1347,10 @@ class Pdb(bdb.Bdb, cmd.Cmd):
else:
self.displaying.pop(self.curframe, None)
+ def complete_undisplay(self, text, line, begidx, endidx):
+ return [e for e in self.displaying.get(self.curframe, {})
+ if e.startswith(text)]
+
def do_interact(self, arg):
"""interact
@@ -1313,6 +1405,9 @@ class Pdb(bdb.Bdb, cmd.Cmd):
if args[0] in self.aliases:
del self.aliases[args[0]]
+ def complete_unalias(self, text, line, begidx, endidx):
+ return [a for a in self.aliases if a.startswith(text)]
+
# List of all the commands making the program resume execution.
commands_resuming = ['do_continue', 'do_step', 'do_next', 'do_return',
'do_quit', 'do_jump']
diff --git a/Lib/pickle.py b/Lib/pickle.py
index d10ac776..9e65368 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -23,8 +23,6 @@ Misc variables:
"""
-__version__ = "$Revision$" # Code version
-
from types import FunctionType, BuiltinFunctionType
from copyreg import dispatch_table
from copyreg import _extension_registry, _inverted_registry, _extension_cache
@@ -299,8 +297,8 @@ class _Pickler:
f(self, obj) # Call unbound method with explicit self
return
- # Check copyreg.dispatch_table
- reduce = dispatch_table.get(t)
+ # Check private dispatch table if any, or else copyreg.dispatch_table
+ reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
if reduce:
rv = reduce(obj)
else:
@@ -377,7 +375,7 @@ class _Pickler:
# allowing protocol 0 and 1 to work normally. For this to
# work, the function returned by __reduce__ should be
# called __newobj__, and its first argument should be a
- # new-style class. The implementation for __newobj__
+ # class. The implementation for __newobj__
# should be as follows, although pickle has no way to
# verify this:
#
@@ -440,6 +438,14 @@ class _Pickler:
self.write(NONE)
dispatch[type(None)] = save_none
+ def save_ellipsis(self, obj):
+ self.save_global(Ellipsis, 'Ellipsis')
+ dispatch[type(Ellipsis)] = save_ellipsis
+
+ def save_notimplemented(self, obj):
+ self.save_global(NotImplemented, 'NotImplemented')
+ dispatch[type(NotImplemented)] = save_notimplemented
+
def save_bool(self, obj):
if self.proto >= 2:
self.write(obj and NEWTRUE or NEWFALSE)
@@ -1332,7 +1338,7 @@ def _test():
return doctest.testmod()
if __name__ == "__main__":
- import sys, argparse
+ import argparse
parser = argparse.ArgumentParser(
description='display contents of the pickle files')
parser.add_argument(
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index ec6cc53..66f4edd 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -510,10 +510,7 @@ def read_decimalnl_short(f):
elif s == b"01":
return True
- try:
- return int(s)
- except OverflowError:
- return int(s)
+ return int(s)
def read_decimalnl_long(f):
r"""
@@ -1642,6 +1639,8 @@ opcodes = [
is pushed on the stack.
NOTE: checks for __safe_for_unpickling__ went away in Python 2.3.
+ NOTE: the distinction between old-style and new-style classes does
+ not make sense in Python 3.
"""),
I(name='OBJ',
diff --git a/Lib/pipes.py b/Lib/pipes.py
index 4297053..f1a16f6 100644
--- a/Lib/pipes.py
+++ b/Lib/pipes.py
@@ -60,7 +60,9 @@ To create a new template object initialized to a given one:
import re
import os
import tempfile
-import string
+# we import the quote function rather than the module for backward compat
+# (quote used to be an undocumented but used function in pipes)
+from shlex import quote
__all__ = ["Template"]
@@ -243,22 +245,3 @@ def makepipeline(infile, steps, outfile):
cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
#
return cmdlist
-
-
-# Reliably quote a string as a single argument for /bin/sh
-
-# Safe unquoted
-_safechars = frozenset(string.ascii_letters + string.digits + '@%_-+=:,./')
-
-def quote(file):
- """Return a shell-escaped version of the file string."""
- for c in file:
- if c not in _safechars:
- break
- else:
- if not file:
- return "''"
- return file
- # use single quotes, and put single quotes into double quotes
- # the string $'b is then quoted as '$'"'"'b'
- return "'" + file.replace("'", "'\"'\"'") + "'"
diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py
index 51da0b1..8bdeb32 100644
--- a/Lib/pkgutil.py
+++ b/Lib/pkgutil.py
@@ -2,8 +2,10 @@
import os
import sys
+import importlib
import imp
import os.path
+from warnings import warn
from types import ModuleType
__all__ = [
@@ -21,7 +23,7 @@ def read_code(stream):
if magic != imp.get_magic():
return None
- stream.read(4) # Skip timestamp
+ stream.read(8) # Skip timestamp and size
return marshal.load(stream)
@@ -155,6 +157,49 @@ def iter_importer_modules(importer, prefix=''):
iter_importer_modules = simplegeneric(iter_importer_modules)
+# Implement a file walker for the normal importlib path hook
+def _iter_file_finder_modules(importer, prefix=''):
+ if importer.path is None or not os.path.isdir(importer.path):
+ return
+
+ yielded = {}
+ import inspect
+ try:
+ filenames = os.listdir(importer.path)
+ except OSError:
+ # ignore unreadable directories like import does
+ filenames = []
+ filenames.sort() # handle packages before same-named modules
+
+ for fn in filenames:
+ modname = inspect.getmodulename(fn)
+ if modname=='__init__' or modname in yielded:
+ continue
+
+ path = os.path.join(importer.path, fn)
+ ispkg = False
+
+ if not modname and os.path.isdir(path) and '.' not in fn:
+ modname = fn
+ try:
+ dircontents = os.listdir(path)
+ except OSError:
+ # ignore unreadable directories like import does
+ dircontents = []
+ for fn in dircontents:
+ subname = inspect.getmodulename(fn)
+ if subname=='__init__':
+ ispkg = True
+ break
+ else:
+ continue # not a package
+
+ if modname and '.' not in modname:
+ yielded[modname] = 1
+ yield prefix + modname, ispkg
+
+iter_importer_modules.register(
+ importlib.machinery.FileFinder, _iter_file_finder_modules)
class ImpImporter:
"""PEP 302 Importer that wraps Python's "classic" import algorithm
@@ -168,6 +213,8 @@ class ImpImporter:
"""
def __init__(self, path=None):
+ warn("This emulation is deprecated, use 'importlib' instead",
+ DeprecationWarning)
self.path = path
def find_module(self, fullname, path=None):
@@ -232,6 +279,8 @@ class ImpLoader:
code = source = None
def __init__(self, fullname, file, filename, etc):
+ warn("This emulation is deprecated, use 'importlib' instead",
+ DeprecationWarning)
self.file = file
self.filename = filename
self.fullname = fullname
@@ -256,7 +305,7 @@ class ImpLoader:
if self.file and self.file.closed:
mod_type = self.etc[2]
if mod_type==imp.PY_SOURCE:
- self.file = open(self.filename, 'rU')
+ self.file = open(self.filename, 'r')
elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
self.file = open(self.filename, 'rb')
@@ -301,7 +350,7 @@ class ImpLoader:
self.file.close()
elif mod_type==imp.PY_COMPILED:
if os.path.exists(self.filename[:-1]):
- f = open(self.filename[:-1], 'rU')
+ f = open(self.filename[:-1], 'r')
self.source = f.read()
f.close()
elif mod_type==imp.PKG_DIRECTORY:
@@ -315,9 +364,9 @@ class ImpLoader:
def get_filename(self, fullname=None):
fullname = self._fix_name(fullname)
mod_type = self.etc[2]
- if self.etc[2]==imp.PKG_DIRECTORY:
+ if mod_type==imp.PKG_DIRECTORY:
return self._get_delegate().get_filename()
- elif self.etc[2] in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
+ elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
return self.filename
return None
@@ -366,10 +415,6 @@ def get_importer(path_item):
The returned importer is cached in sys.path_importer_cache
if it was newly created by a path hook.
- If there is no importer, a wrapper around the basic import
- machinery is returned. This wrapper is never inserted into
- the importer cache (None is inserted instead).
-
The cache (or part of it) can be cleared manually if a
rescan of sys.path_hooks is necessary.
"""
@@ -379,18 +424,12 @@ def get_importer(path_item):
for path_hook in sys.path_hooks:
try:
importer = path_hook(path_item)
+ sys.path_importer_cache.setdefault(path_item, importer)
break
except ImportError:
pass
else:
importer = None
- sys.path_importer_cache.setdefault(path_item, importer)
-
- if importer is None:
- try:
- importer = ImpImporter(path_item)
- except ImportError:
- importer = None
return importer
@@ -398,55 +437,37 @@ def iter_importers(fullname=""):
"""Yield PEP 302 importers for the given module name
If fullname contains a '.', the importers will be for the package
- containing fullname, otherwise they will be importers for sys.meta_path,
- sys.path, and Python's "classic" import machinery, in that order. If
- the named module is in a package, that package is imported as a side
- effect of invoking this function.
-
- Non PEP 302 mechanisms (e.g. the Windows registry) used by the
- standard import machinery to find files in alternative locations
- are partially supported, but are searched AFTER sys.path. Normally,
- these locations are searched BEFORE sys.path, preventing sys.path
- entries from shadowing them.
+ containing fullname, otherwise they will be all registered top level
+ importers (i.e. those on both sys.meta_path and sys.path_hooks).
- For this to cause a visible difference in behaviour, there must
- be a module or package name that is accessible via both sys.path
- and one of the non PEP 302 file system mechanisms. In this case,
- the emulation will find the former version, while the builtin
- import mechanism will find the latter.
+ If the named module is in a package, that package is imported as a side
+ effect of invoking this function.
- Items of the following types can be affected by this discrepancy:
- imp.C_EXTENSION, imp.PY_SOURCE, imp.PY_COMPILED, imp.PKG_DIRECTORY
+ If no module name is specified, all top level importers are produced.
"""
if fullname.startswith('.'):
- raise ImportError("Relative module names not supported")
+ msg = "Relative module name {!r} not supported".format(fullname)
+ raise ImportError(msg)
if '.' in fullname:
# Get the containing package's __path__
- pkg = '.'.join(fullname.split('.')[:-1])
- if pkg not in sys.modules:
- __import__(pkg)
- path = getattr(sys.modules[pkg], '__path__', None) or []
+ pkg_name = fullname.rpartition(".")[0]
+ pkg = importlib.import_module(pkg)
+ path = getattr(sys.modules[pkg], '__path__', None)
+ if path is None:
+ return
else:
for importer in sys.meta_path:
yield importer
path = sys.path
for item in path:
yield get_importer(item)
- if '.' not in fullname:
- yield ImpImporter()
def get_loader(module_or_name):
"""Get a PEP 302 "loader" object for module_or_name
- If the module or package is accessible via the normal import
- mechanism, a wrapper around the relevant part of that machinery
- is returned. Returns None if the module cannot be found or imported.
+ Returns None if the module cannot be found or imported.
If the named module is not already imported, its containing package
(if any) is imported, in order to establish the package __path__.
-
- This function uses iter_importers(), and is thus subject to the same
- limitations regarding platform-specific special import locations such
- as the Windows registry.
"""
if module_or_name in sys.modules:
module_or_name = sys.modules[module_or_name]
@@ -460,20 +481,33 @@ def get_loader(module_or_name):
fullname = module_or_name
return find_loader(fullname)
+
def find_loader(fullname):
"""Find a PEP 302 "loader" object for fullname
- If fullname contains dots, path must be the containing package's __path__.
- Returns None if the module cannot be found or imported. This function uses
- iter_importers(), and is thus subject to the same limitations regarding
- platform-specific special import locations such as the Windows registry.
+ This is s convenience wrapper around :func:`importlib.find_loader` that
+ sets the *path* argument correctly when searching for submodules, and
+ also ensures parent packages (if any) are imported before searching for
+ submodules.
"""
- for importer in iter_importers(fullname):
- loader = importer.find_module(fullname)
- if loader is not None:
- return loader
-
- return None
+ if fullname.startswith('.'):
+ msg = "Relative module name {!r} not supported".format(fullname)
+ raise ImportError(msg)
+ path = None
+ pkg_name = fullname.rpartition(".")[0]
+ if pkg_name:
+ pkg = importlib.import_module(pkg_name)
+ path = getattr(pkg, "__path__", None)
+ if path is None:
+ return None
+ try:
+ return importlib.find_loader(fullname, path)
+ except (ImportError, AttributeError, TypeError, ValueError) as ex:
+ # This hack fixes an impedance mismatch between pkgutil and
+ # importlib, where the latter throws other errors for cases where
+ # pkgutil previously threw ImportError
+ msg = "Error while finding loader for {!r} ({}: {})"
+ raise ImportError(msg.format(fullname, type(ex), ex)) from ex
def extend_path(path, name):
@@ -514,21 +548,41 @@ def extend_path(path, name):
# frozen package. Return the path unchanged in that case.
return path
- pname = os.path.join(*name.split('.')) # Reconstitute as relative path
sname_pkg = name + ".pkg"
- init_py = "__init__.py"
path = path[:] # Start with a copy of the existing path
- for dir in sys.path:
- if not isinstance(dir, str) or not os.path.isdir(dir):
+ parent_package, _, final_name = name.rpartition('.')
+ if parent_package:
+ try:
+ search_path = sys.modules[parent_package].__path__
+ except (KeyError, AttributeError):
+ # We can't do anything: find_loader() returns None when
+ # passed a dotted name.
+ return path
+ else:
+ search_path = sys.path
+
+ for dir in search_path:
+ if not isinstance(dir, str):
continue
- subdir = os.path.join(dir, pname)
- # XXX This may still add duplicate entries to path on
- # case-insensitive filesystems
- initfile = os.path.join(subdir, init_py)
- if subdir not in path and os.path.isfile(initfile):
- path.append(subdir)
+
+ finder = get_importer(dir)
+ if finder is not None:
+ # Is this finder PEP 420 compliant?
+ if hasattr(finder, 'find_loader'):
+ loader, portions = finder.find_loader(final_name)
+ else:
+ # No, no need to call it
+ loader = None
+ portions = []
+
+ for portion in portions:
+ # XXX This may still add duplicate entries to path on
+ # case-insensitive filesystems
+ if portion not in path:
+ path.append(portion)
+
# XXX Is this the right thing for subpackages like zope.app?
# It looks for a file named "zope.app.pkg"
pkgfile = os.path.join(dir, sname_pkg)
diff --git a/Lib/plat-linux2/CDROM.py b/Lib/plat-linux/CDROM.py
index 4340936..4340936 100644
--- a/Lib/plat-linux2/CDROM.py
+++ b/Lib/plat-linux/CDROM.py
diff --git a/Lib/plat-linux2/DLFCN.py b/Lib/plat-linux/DLFCN.py
index dd10ac4..dd10ac4 100644
--- a/Lib/plat-linux2/DLFCN.py
+++ b/Lib/plat-linux/DLFCN.py
diff --git a/Lib/plat-linux2/IN.py b/Lib/plat-linux/IN.py
index d7d3002..d7d3002 100644
--- a/Lib/plat-linux2/IN.py
+++ b/Lib/plat-linux/IN.py
diff --git a/Lib/plat-linux2/TYPES.py b/Lib/plat-linux/TYPES.py
index e7a324b..e7a324b 100644
--- a/Lib/plat-linux2/TYPES.py
+++ b/Lib/plat-linux/TYPES.py
diff --git a/Lib/plat-linux2/regen b/Lib/plat-linux/regen
index c76950e..c76950e 100755
--- a/Lib/plat-linux2/regen
+++ b/Lib/plat-linux/regen
diff --git a/Lib/platform.py b/Lib/platform.py
index 686a045..2b8a24a 100755
--- a/Lib/platform.py
+++ b/Lib/platform.py
@@ -111,6 +111,7 @@ __copyright__ = """
__version__ = '1.0.7'
+import collections
import sys, os, re, subprocess
### Globals & Constants
@@ -130,15 +131,15 @@ except AttributeError:
### Platform specific APIs
-_libc_search = re.compile(r'(__libc_init)'
- '|'
- '(GLIBC_([0-9.]+))'
- '|'
- '(libc(_\w+)?\.so(?:\.(\d[0-9.]*))?)', re.ASCII)
+_libc_search = re.compile(b'(__libc_init)'
+ b'|'
+ b'(GLIBC_([0-9.]+))'
+ b'|'
+ br'(libc(_\w+)?\.so(?:\.(\d[0-9.]*))?)', re.ASCII)
def libc_ver(executable=sys.executable,lib='',version='',
- chunksize=2048):
+ chunksize=16384):
""" Tries to determine the libc version that the file executable
(which defaults to the Python interpreter) is linked against.
@@ -159,17 +160,22 @@ def libc_ver(executable=sys.executable,lib='',version='',
# able to open symlinks for reading
executable = os.path.realpath(executable)
f = open(executable,'rb')
- binary = f.read(chunksize).decode('latin-1')
+ binary = f.read(chunksize)
pos = 0
while 1:
- m = _libc_search.search(binary,pos)
+ if b'libc' in binary or b'GLIBC' in binary:
+ m = _libc_search.search(binary,pos)
+ else:
+ m = None
if not m:
- binary = f.read(chunksize).decode('latin-1')
+ binary = f.read(chunksize)
if not binary:
break
pos = 0
continue
- libcinit,glibc,glibcversion,so,threads,soversion = m.groups()
+ libcinit,glibc,glibcversion,so,threads,soversion = [
+ s.decode('latin1') if s is not None else s
+ for s in m.groups()]
if libcinit and not lib:
lib = 'libc'
elif glibc:
@@ -255,7 +261,7 @@ _release_version = re.compile(r'([^0-9]+)'
_supported_dists = (
'SuSE', 'debian', 'fedora', 'redhat', 'centos',
'mandrake', 'mandriva', 'rocks', 'slackware', 'yellowdog', 'gentoo',
- 'UnitedLinux', 'turbolinux')
+ 'UnitedLinux', 'turbolinux', 'arch', 'mageia')
def _parse_release_file(firstline):
@@ -357,92 +363,13 @@ def dist(distname='',version='',id='',
supported_dists=supported_dists,
full_distribution_name=0)
-class _popen:
-
- """ Fairly portable (alternative) popen implementation.
-
- This is mostly needed in case os.popen() is not available, or
- doesn't work as advertised, e.g. in Win9X GUI programs like
- PythonWin or IDLE.
-
- Writing to the pipe is currently not supported.
-
- """
- tmpfile = ''
- pipe = None
- bufsize = None
- mode = 'r'
-
- def __init__(self,cmd,mode='r',bufsize=None):
-
- if mode != 'r':
- raise ValueError('popen()-emulation only supports read mode')
- import tempfile
- self.tmpfile = tmpfile = tempfile.mktemp()
- os.system(cmd + ' > %s' % tmpfile)
- self.pipe = open(tmpfile,'rb')
- self.bufsize = bufsize
- self.mode = mode
-
- def read(self):
-
- return self.pipe.read()
-
- def readlines(self):
-
- if self.bufsize is not None:
- return self.pipe.readlines()
-
- def close(self,
-
- remove=os.unlink,error=os.error):
-
- if self.pipe:
- rc = self.pipe.close()
- else:
- rc = 255
- if self.tmpfile:
- try:
- remove(self.tmpfile)
- except error:
- pass
- return rc
-
- # Alias
- __del__ = close
-
def popen(cmd, mode='r', bufsize=-1):
""" Portable popen() interface.
"""
- # Find a working popen implementation preferring win32pipe.popen
- # over os.popen over _popen
- popen = None
- if os.environ.get('OS','') == 'Windows_NT':
- # On NT win32pipe should work; on Win9x it hangs due to bugs
- # in the MS C lib (see MS KnowledgeBase article Q150956)
- try:
- import win32pipe
- except ImportError:
- pass
- else:
- popen = win32pipe.popen
- if popen is None:
- if hasattr(os,'popen'):
- popen = os.popen
- # Check whether it works... it doesn't in GUI programs
- # on Windows platforms
- if sys.platform == 'win32': # XXX Others too ?
- try:
- popen('')
- except os.error:
- popen = _popen
- else:
- popen = _popen
- if bufsize is None:
- return popen(cmd,mode)
- else:
- return popen(cmd,mode,bufsize)
+ import warnings
+ warnings.warn('use os.popen instead', DeprecationWarning, stacklevel=2)
+ return os.popen(cmd, mode, bufsize)
def _norm_version(version, build=''):
@@ -779,7 +706,7 @@ def _mac_ver_xml():
pl = plistlib.readPlist(fn)
release = pl['ProductVersion']
versioninfo=('', '', '')
- machine = os.uname()[4]
+ machine = os.uname().machine
if machine in ('ppc', 'Power Macintosh'):
# for compatibility with the gestalt based code
machine = 'PowerPC'
@@ -1004,9 +931,10 @@ def _syscmd_file(target,default=''):
try:
proc = subprocess.Popen(['file', target],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
except (AttributeError,os.error):
return default
- output = proc.communicate()[0].decode("latin-1")
+ output = proc.communicate()[0].decode('latin-1')
rc = proc.wait()
if not output or rc:
return default
@@ -1107,6 +1035,9 @@ def architecture(executable=sys.executable,bits='',linkage=''):
### Portable uname() interface
+uname_result = collections.namedtuple("uname_result",
+ "system node release version machine processor")
+
_uname_cache = None
def uname():
@@ -1241,7 +1172,7 @@ def uname():
system = 'Windows'
release = 'Vista'
- _uname_cache = system,node,release,version,machine,processor
+ _uname_cache = uname_result(system,node,release,version,machine,processor)
return _uname_cache
### Direct interfaces to some of the uname() return values
@@ -1253,7 +1184,7 @@ def system():
An empty string is returned if the value cannot be determined.
"""
- return uname()[0]
+ return uname().system
def node():
@@ -1263,7 +1194,7 @@ def node():
An empty string is returned if the value cannot be determined.
"""
- return uname()[1]
+ return uname().node
def release():
@@ -1272,7 +1203,7 @@ def release():
An empty string is returned if the value cannot be determined.
"""
- return uname()[2]
+ return uname().release
def version():
@@ -1281,7 +1212,7 @@ def version():
An empty string is returned if the value cannot be determined.
"""
- return uname()[3]
+ return uname().version
def machine():
@@ -1290,7 +1221,7 @@ def machine():
An empty string is returned if the value cannot be determined.
"""
- return uname()[4]
+ return uname().machine
def processor():
@@ -1302,7 +1233,7 @@ def processor():
e.g. NetBSD does this.
"""
- return uname()[5]
+ return uname().processor
### Various APIs for extracting information from sys.version
diff --git a/Lib/plistlib.py b/Lib/plistlib.py
index 2e7e512..41fd8f2 100644
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@@ -237,20 +237,26 @@ class PlistWriter(DumbXMLWriter):
self.endElement("data")
def writeDict(self, d):
- self.beginElement("dict")
- items = sorted(d.items())
- for key, value in items:
- if not isinstance(key, str):
- raise TypeError("keys must be strings")
- self.simpleElement("key", key)
- self.writeValue(value)
- self.endElement("dict")
+ if d:
+ self.beginElement("dict")
+ items = sorted(d.items())
+ for key, value in items:
+ if not isinstance(key, str):
+ raise TypeError("keys must be strings")
+ self.simpleElement("key", key)
+ self.writeValue(value)
+ self.endElement("dict")
+ else:
+ self.simpleElement("dict")
def writeArray(self, array):
- self.beginElement("array")
- for value in array:
- self.writeValue(value)
- self.endElement("array")
+ if array:
+ self.beginElement("array")
+ for value in array:
+ self.writeValue(value)
+ self.endElement("array")
+ else:
+ self.simpleElement("array")
class _InternalDict(dict):
@@ -266,13 +272,13 @@ class _InternalDict(dict):
raise AttributeError(attr)
from warnings import warn
warn("Attribute access from plist dicts is deprecated, use d[key] "
- "notation instead", PendingDeprecationWarning, 2)
+ "notation instead", DeprecationWarning, 2)
return value
def __setattr__(self, attr, value):
from warnings import warn
warn("Attribute access from plist dicts is deprecated, use d[key] "
- "notation instead", PendingDeprecationWarning, 2)
+ "notation instead", DeprecationWarning, 2)
self[attr] = value
def __delattr__(self, attr):
@@ -282,14 +288,14 @@ class _InternalDict(dict):
raise AttributeError(attr)
from warnings import warn
warn("Attribute access from plist dicts is deprecated, use d[key] "
- "notation instead", PendingDeprecationWarning, 2)
+ "notation instead", DeprecationWarning, 2)
class Dict(_InternalDict):
def __init__(self, **kwargs):
from warnings import warn
warn("The plistlib.Dict class is deprecated, use builtin dict instead",
- PendingDeprecationWarning, 2)
+ DeprecationWarning, 2)
super().__init__(**kwargs)
@@ -302,7 +308,7 @@ class Plist(_InternalDict):
def __init__(self, **kwargs):
from warnings import warn
warn("The Plist class is deprecated, use the readPlist() and "
- "writePlist() functions instead", PendingDeprecationWarning, 2)
+ "writePlist() functions instead", DeprecationWarning, 2)
super().__init__(**kwargs)
def fromFile(cls, pathOrFile):
diff --git a/Lib/poplib.py b/Lib/poplib.py
index 84ea88d..d42d9dd 100644
--- a/Lib/poplib.py
+++ b/Lib/poplib.py
@@ -250,15 +250,18 @@ class POP3:
def quit(self):
"""Signoff: commit changes on server, unlock mailbox, close connection."""
- try:
- resp = self._shortcmd('QUIT')
- except error_proto as val:
- resp = val
- self.file.close()
- self.sock.close()
- del self.file, self.sock
+ resp = self._shortcmd('QUIT')
+ self.close()
return resp
+ def close(self):
+ """Close the connection without assuming anything about it."""
+ if self.file is not None:
+ self.file.close()
+ if self.sock is not None:
+ self.sock.close()
+ self.file = self.sock = None
+
#__del__ = quit
diff --git a/Lib/posixpath.py b/Lib/posixpath.py
index 2e3625b..cb93796 100644
--- a/Lib/posixpath.py
+++ b/Lib/posixpath.py
@@ -88,7 +88,8 @@ def join(a, *p):
for s in (a, ) + p)
if valid_types:
# Must have a mixture of text and binary data
- raise TypeError("Can't mix strings and bytes in path components.")
+ raise TypeError("Can't mix strings and bytes in path "
+ "components.") from None
raise
return path
diff --git a/Lib/profile.py b/Lib/profile.py
index 297e32d..743e77d 100755
--- a/Lib/profile.py
+++ b/Lib/profile.py
@@ -83,26 +83,6 @@ def runctx(statement, globals, locals, filename=None, sort=-1):
else:
return prof.print_stats(sort)
-if hasattr(os, "times"):
- def _get_time_times(timer=os.times):
- t = timer()
- return t[0] + t[1]
-
-# Using getrusage(3) is better than clock(3) if available:
-# on some systems (e.g. FreeBSD), getrusage has a higher resolution
-# Furthermore, on a POSIX system, returns microseconds, which
-# wrap around after 36min.
-_has_res = 0
-try:
- import resource
- resgetrusage = lambda: resource.getrusage(resource.RUSAGE_SELF)
- def _get_time_resource(timer=resgetrusage):
- t = timer()
- return t[0] + t[1]
- _has_res = 1
-except ImportError:
- pass
-
class Profile:
"""Profiler class.
@@ -155,20 +135,8 @@ class Profile:
self.bias = bias # Materialize in local dict for lookup speed.
if not timer:
- if _has_res:
- self.timer = resgetrusage
- self.dispatcher = self.trace_dispatch
- self.get_time = _get_time_resource
- elif hasattr(time, 'clock'):
- self.timer = self.get_time = time.clock
- self.dispatcher = self.trace_dispatch_i
- elif hasattr(os, 'times'):
- self.timer = os.times
- self.dispatcher = self.trace_dispatch
- self.get_time = _get_time_times
- else:
- self.timer = self.get_time = time.time
- self.dispatcher = self.trace_dispatch_i
+ self.timer = self.get_time = time.process_time
+ self.dispatcher = self.trace_dispatch_i
else:
self.timer = timer
t = self.timer() # test out timer function
diff --git a/Lib/pstats.py b/Lib/pstats.py
index bfbaa41..6a77605 100644
--- a/Lib/pstats.py
+++ b/Lib/pstats.py
@@ -678,13 +678,14 @@ if __name__ == '__main__':
return stop
return None
- import sys
if len(sys.argv) > 1:
initprofile = sys.argv[1]
else:
initprofile = None
try:
browser = ProfileBrowser(initprofile)
+ for profile in sys.argv[2:]:
+ browser.do_add(profile)
print("Welcome to the profile statistics browser.", file=browser.stream)
browser.cmdloop()
print("Goodbye.", file=browser.stream)
diff --git a/Lib/py_compile.py b/Lib/py_compile.py
index 5adb70a..62d69ad 100644
--- a/Lib/py_compile.py
+++ b/Lib/py_compile.py
@@ -110,9 +110,11 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
"""
with tokenize.open(file) as f:
try:
- timestamp = int(os.fstat(f.fileno()).st_mtime)
+ st = os.fstat(f.fileno())
except AttributeError:
- timestamp = int(os.stat(file).st_mtime)
+ st = os.stat(file)
+ timestamp = int(st.st_mtime)
+ size = st.st_size & 0xFFFFFFFF
codestring = f.read()
try:
codeobject = builtins.compile(codestring, dfile or file, 'exec',
@@ -139,6 +141,7 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
with open(cfile, 'wb') as fc:
fc.write(b'\0\0\0\0')
wr_long(fc, timestamp)
+ wr_long(fc, size)
marshal.dump(codeobject, fc)
fc.flush()
fc.seek(0, 0)
diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py
index 65e9fbe..9ec05ee 100644
--- a/Lib/pyclbr.py
+++ b/Lib/pyclbr.py
@@ -39,8 +39,10 @@ Instances of this class have the following instance variables:
lineno -- the line in the file on which the class statement occurred
"""
+import io
+import os
import sys
-import imp
+import importlib
import tokenize
from token import NAME, DEDENT, OP
from operator import itemgetter
@@ -135,19 +137,24 @@ def _readmodule(module, path, inpackage=None):
# Search the path for the module
f = None
if inpackage is not None:
- f, fname, (_s, _m, ty) = imp.find_module(module, path)
+ search_path = path
else:
- f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
- if ty == imp.PKG_DIRECTORY:
- dict['__path__'] = [fname]
- path = [fname] + path
- f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
+ search_path = path + sys.path
+ loader = importlib.find_loader(fullmodule, search_path)
+ fname = loader.get_filename(fullmodule)
_modules[fullmodule] = dict
- if ty != imp.PY_SOURCE:
+ if loader.is_package(fullmodule):
+ dict['__path__'] = [os.path.dirname(fname)]
+ try:
+ source = loader.get_source(fullmodule)
+ if source is None:
+ return dict
+ except (AttributeError, ImportError):
# not Python source, can't do anything with this module
- f.close()
return dict
+ f = io.StringIO(source)
+
stack = [] # stack of (class, indent) pairs
g = tokenize.generate_tokens(f.readline)
diff --git a/Lib/pydoc.py b/Lib/pydoc.py
index 37616fb..fa531e9 100755
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""Generate Python documentation in HTML or text for interactive use.
-In the Python interpreter, do "from pydoc import help" to provide online
+In the Python interpreter, do "from pydoc import help" to provide
help. Calling help(thing) on a Python object documents the object.
Or, at the shell command line outside of Python:
@@ -22,11 +22,6 @@ Run "pydoc -b" to start an HTTP server on an arbitrary unused port and
open a Web browser to interactively browse documentation. The -p option
can be used with the -b option to explicitly specify the server port.
-For platforms without a command line, "pydoc -g" starts the HTTP server
-and also pops up a little window for controlling it. This option is
-deprecated, since the server can now be controlled directly from HTTP
-clients.
-
Run "pydoc -w <name>" to write out the HTML documentation for a module
to a file named "<name>.html".
@@ -42,7 +37,6 @@ __all__ = ['help']
__author__ = "Ka-Ping Yee <ping@lfw.org>"
__date__ = "26 February 2001"
-__version__ = "$Revision$"
__credits__ = """Guido van Rossum, for an excellent programming language.
Tommy Burnette, the original creator of manpy.
Paul Prescod, for all his work on onlinehelp.
@@ -59,6 +53,7 @@ Richard Chamberlain, for the first implementation of textdoc.
import builtins
import imp
+import importlib.machinery
import inspect
import io
import os
@@ -168,12 +163,12 @@ def _split_list(s, predicate):
def visiblename(name, all=None, obj=None):
"""Decide whether to show documentation on a variable."""
- # Certain special names are redundant.
- _hidden_names = ('__builtins__', '__doc__', '__file__', '__path__',
- '__module__', '__name__', '__slots__', '__package__',
- '__cached__', '__author__', '__credits__', '__date__',
- '__version__')
- if name in _hidden_names: return 0
+ # Certain special names are redundant or internal.
+ if name in {'__author__', '__builtins__', '__cached__', '__credits__',
+ '__date__', '__doc__', '__file__', '__initializing__',
+ '__loader__', '__module__', '__name__', '__package__',
+ '__path__', '__qualname__', '__slots__', '__version__'}:
+ return 0
# Private names are hidden, but special names are displayed.
if name.startswith('__') and name.endswith('__'): return 1
# Namedtuples have public fields and methods with a single leading underscore
@@ -226,20 +221,34 @@ def synopsis(filename, cache={}):
mtime = os.stat(filename).st_mtime
lastupdate, result = cache.get(filename, (None, None))
if lastupdate is None or lastupdate < mtime:
- info = inspect.getmoduleinfo(filename)
try:
file = tokenize.open(filename)
except IOError:
# module can't be opened, so skip it
return None
- if info and 'b' in info[2]: # binary modules have to be imported
- try: module = imp.load_module('__temp__', file, filename, info[1:])
- except: return None
+ binary_suffixes = importlib.machinery.BYTECODE_SUFFIXES[:]
+ binary_suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
+ if any(filename.endswith(x) for x in binary_suffixes):
+ # binary modules have to be imported
+ file.close()
+ if any(filename.endswith(x) for x in
+ importlib.machinery.BYTECODE_SUFFIXES):
+ loader = importlib.machinery.SourcelessFileLoader('__temp__',
+ filename)
+ else:
+ loader = importlib.machinery.ExtensionFileLoader('__temp__',
+ filename)
+ try:
+ module = loader.load_module('__temp__')
+ except:
+ return None
result = (module.__doc__ or '').splitlines()[0]
del sys.modules['__temp__']
- else: # text modules can be directly examined
+ else:
+ # text modules can be directly examined
result = source_synopsis(file)
file.close()
+
cache[filename] = (mtime, result)
return result
@@ -305,9 +314,8 @@ def safeimport(path, forceload=0, cache={}):
elif exc is SyntaxError:
# A SyntaxError occurred before we could execute the module.
raise ErrorDuringImport(value.filename, info)
- elif exc is ImportError and extract_tb(tb)[-1][2]=='safeimport':
- # The import error occurred directly in this function,
- # which means there is no such module in the path.
+ elif exc is ImportError and value.name == path:
+ # No such module in the path.
return None
else:
# Some other error occurred during the importing process.
@@ -361,7 +369,7 @@ class Doc:
docloc = os.environ.get("PYTHONDOCS", self.PYTHONDOCS)
- basedir = os.path.join(sys.exec_prefix, "lib",
+ basedir = os.path.join(sys.base_exec_prefix, "lib",
"python%d.%d" % sys.version_info[:2])
if (isinstance(object, type(os)) and
(object.__name__ in ('errno', 'exceptions', 'gc', 'imp',
@@ -963,6 +971,9 @@ class HTMLDoc(Doc):
modpkgs = []
if shadowed is None: shadowed = {}
for importer, name, ispkg in pkgutil.iter_modules([dir]):
+ if any((0xD800 <= ord(ch) <= 0xDFFF) for ch in name):
+ # ignore a module if its name contains a surrogate character
+ continue
modpkgs.append((name, '', ispkg, name in shadowed))
shadowed[name] = 1
@@ -1827,7 +1838,7 @@ has the same effect as typing a particular string at the help> prompt.
def intro(self):
self.output.write('''
-Welcome to Python %s! This is the online help utility.
+Welcome to Python %s! This is the interactive help utility.
If this is your first time using Python, you should definitely check out
the tutorial on the Internet at http://docs.python.org/%s/tutorial/.
@@ -2018,14 +2029,6 @@ class ModuleScanner:
if self.quit:
break
- # XXX Skipping this file is a workaround for a bug
- # that causes python to crash with a segfault.
- # http://bugs.python.org/issue9319
- #
- # TODO Remove this once the bug is fixed.
- if modname in {'test.badsyntax_pep3120', 'badsyntax_pep3120'}:
- continue
-
if key is None:
callback(None, modname, '')
else:
@@ -2037,7 +2040,7 @@ class ModuleScanner:
if hasattr(loader, 'get_source'):
try:
source = loader.get_source(modname)
- except UnicodeDecodeError:
+ except Exception:
if onerror:
onerror(modname)
continue
@@ -2074,272 +2077,6 @@ def apropos(key):
warnings.filterwarnings('ignore') # ignore problems during import
ModuleScanner().run(callback, key, onerror=onerror)
-# --------------------------------------------------- Web browser interface
-
-def serve(port, callback=None, completer=None):
- import http.server, email.message, select
-
- msg = 'the pydoc.serve() function is deprecated'
- warnings.warn(msg, DeprecationWarning, stacklevel=2)
-
- class DocHandler(http.server.BaseHTTPRequestHandler):
- def send_document(self, title, contents):
- try:
- self.send_response(200)
- self.send_header('Content-Type', 'text/html; charset=UTF-8')
- self.end_headers()
- self.wfile.write(html.page(title, contents).encode('utf-8'))
- except IOError: pass
-
- def do_GET(self):
- path = self.path
- if path[-5:] == '.html': path = path[:-5]
- if path[:1] == '/': path = path[1:]
- if path and path != '.':
- try:
- obj = locate(path, forceload=1)
- except ErrorDuringImport as value:
- self.send_document(path, html.escape(str(value)))
- return
- if obj:
- self.send_document(describe(obj), html.document(obj, path))
- else:
- self.send_document(path,
-'no Python documentation found for %s' % repr(path))
- else:
- heading = html.heading(
-'<big><big><strong>Python: Index of Modules</strong></big></big>',
-'#ffffff', '#7799ee')
- def bltinlink(name):
- return '<a href="%s.html">%s</a>' % (name, name)
- names = [x for x in sys.builtin_module_names if x != '__main__']
- contents = html.multicolumn(names, bltinlink)
- indices = ['<p>' + html.bigsection(
- 'Built-in Modules', '#ffffff', '#ee77aa', contents)]
-
- seen = {}
- for dir in sys.path:
- indices.append(html.index(dir, seen))
- contents = heading + ' '.join(indices) + '''<p align=right>
-<font color="#909090" face="helvetica, arial"><strong>
-pydoc</strong> by Ka-Ping Yee &lt;ping@lfw.org&gt;</font>'''
- self.send_document('Index of Modules', contents)
-
- def log_message(self, *args): pass
-
- class DocServer(http.server.HTTPServer):
- def __init__(self, port, callback):
- host = 'localhost'
- self.address = (host, port)
- self.url = 'http://%s:%d/' % (host, port)
- self.callback = callback
- self.base.__init__(self, self.address, self.handler)
-
- def serve_until_quit(self):
- import select
- self.quit = False
- while not self.quit:
- rd, wr, ex = select.select([self.socket.fileno()], [], [], 1)
- if rd: self.handle_request()
- self.server_close()
-
- def server_activate(self):
- self.base.server_activate(self)
- if self.callback: self.callback(self)
-
- DocServer.base = http.server.HTTPServer
- DocServer.handler = DocHandler
- DocHandler.MessageClass = email.message.Message
- try:
- try:
- DocServer(port, callback).serve_until_quit()
- except (KeyboardInterrupt, select.error):
- pass
- finally:
- if completer: completer()
-
-# ----------------------------------------------------- graphical interface
-
-def gui():
- """Graphical interface (starts Web server and pops up a control window)."""
-
- msg = ('the pydoc.gui() function and "pydoc -g" option are deprecated\n',
- 'use "pydoc.browse() function and "pydoc -b" option instead.')
- warnings.warn(msg, DeprecationWarning, stacklevel=2)
-
- class GUI:
- def __init__(self, window, port=7464):
- self.window = window
- self.server = None
- self.scanner = None
-
- import tkinter
- self.server_frm = tkinter.Frame(window)
- self.title_lbl = tkinter.Label(self.server_frm,
- text='Starting server...\n ')
- self.open_btn = tkinter.Button(self.server_frm,
- text='open browser', command=self.open, state='disabled')
- self.quit_btn = tkinter.Button(self.server_frm,
- text='quit serving', command=self.quit, state='disabled')
-
- self.search_frm = tkinter.Frame(window)
- self.search_lbl = tkinter.Label(self.search_frm, text='Search for')
- self.search_ent = tkinter.Entry(self.search_frm)
- self.search_ent.bind('<Return>', self.search)
- self.stop_btn = tkinter.Button(self.search_frm,
- text='stop', pady=0, command=self.stop, state='disabled')
- if sys.platform == 'win32':
- # Trying to hide and show this button crashes under Windows.
- self.stop_btn.pack(side='right')
-
- self.window.title('pydoc')
- self.window.protocol('WM_DELETE_WINDOW', self.quit)
- self.title_lbl.pack(side='top', fill='x')
- self.open_btn.pack(side='left', fill='x', expand=1)
- self.quit_btn.pack(side='right', fill='x', expand=1)
- self.server_frm.pack(side='top', fill='x')
-
- self.search_lbl.pack(side='left')
- self.search_ent.pack(side='right', fill='x', expand=1)
- self.search_frm.pack(side='top', fill='x')
- self.search_ent.focus_set()
-
- font = ('helvetica', sys.platform == 'win32' and 8 or 10)
- self.result_lst = tkinter.Listbox(window, font=font, height=6)
- self.result_lst.bind('<Button-1>', self.select)
- self.result_lst.bind('<Double-Button-1>', self.goto)
- self.result_scr = tkinter.Scrollbar(window,
- orient='vertical', command=self.result_lst.yview)
- self.result_lst.config(yscrollcommand=self.result_scr.set)
-
- self.result_frm = tkinter.Frame(window)
- self.goto_btn = tkinter.Button(self.result_frm,
- text='go to selected', command=self.goto)
- self.hide_btn = tkinter.Button(self.result_frm,
- text='hide results', command=self.hide)
- self.goto_btn.pack(side='left', fill='x', expand=1)
- self.hide_btn.pack(side='right', fill='x', expand=1)
-
- self.window.update()
- self.minwidth = self.window.winfo_width()
- self.minheight = self.window.winfo_height()
- self.bigminheight = (self.server_frm.winfo_reqheight() +
- self.search_frm.winfo_reqheight() +
- self.result_lst.winfo_reqheight() +
- self.result_frm.winfo_reqheight())
- self.bigwidth, self.bigheight = self.minwidth, self.bigminheight
- self.expanded = 0
- self.window.wm_geometry('%dx%d' % (self.minwidth, self.minheight))
- self.window.wm_minsize(self.minwidth, self.minheight)
- self.window.tk.willdispatch()
-
- import threading
- threading.Thread(
- target=serve, args=(port, self.ready, self.quit)).start()
-
- def ready(self, server):
- self.server = server
- self.title_lbl.config(
- text='Python documentation server at\n' + server.url)
- self.open_btn.config(state='normal')
- self.quit_btn.config(state='normal')
-
- def open(self, event=None, url=None):
- url = url or self.server.url
- import webbrowser
- webbrowser.open(url)
-
- def quit(self, event=None):
- if self.server:
- self.server.quit = 1
- self.window.quit()
-
- def search(self, event=None):
- key = self.search_ent.get()
- self.stop_btn.pack(side='right')
- self.stop_btn.config(state='normal')
- self.search_lbl.config(text='Searching for "%s"...' % key)
- self.search_ent.forget()
- self.search_lbl.pack(side='left')
- self.result_lst.delete(0, 'end')
- self.goto_btn.config(state='disabled')
- self.expand()
-
- import threading
- if self.scanner:
- self.scanner.quit = 1
- self.scanner = ModuleScanner()
- threading.Thread(target=self.scanner.run,
- args=(self.update, key, self.done)).start()
-
- def update(self, path, modname, desc):
- if modname[-9:] == '.__init__':
- modname = modname[:-9] + ' (package)'
- self.result_lst.insert('end',
- modname + ' - ' + (desc or '(no description)'))
-
- def stop(self, event=None):
- if self.scanner:
- self.scanner.quit = 1
- self.scanner = None
-
- def done(self):
- self.scanner = None
- self.search_lbl.config(text='Search for')
- self.search_lbl.pack(side='left')
- self.search_ent.pack(side='right', fill='x', expand=1)
- if sys.platform != 'win32': self.stop_btn.forget()
- self.stop_btn.config(state='disabled')
-
- def select(self, event=None):
- self.goto_btn.config(state='normal')
-
- def goto(self, event=None):
- selection = self.result_lst.curselection()
- if selection:
- modname = self.result_lst.get(selection[0]).split()[0]
- self.open(url=self.server.url + modname + '.html')
-
- def collapse(self):
- if not self.expanded: return
- self.result_frm.forget()
- self.result_scr.forget()
- self.result_lst.forget()
- self.bigwidth = self.window.winfo_width()
- self.bigheight = self.window.winfo_height()
- self.window.wm_geometry('%dx%d' % (self.minwidth, self.minheight))
- self.window.wm_minsize(self.minwidth, self.minheight)
- self.expanded = 0
-
- def expand(self):
- if self.expanded: return
- self.result_frm.pack(side='bottom', fill='x')
- self.result_scr.pack(side='right', fill='y')
- self.result_lst.pack(side='top', fill='both', expand=1)
- self.window.wm_geometry('%dx%d' % (self.bigwidth, self.bigheight))
- self.window.wm_minsize(self.minwidth, self.bigminheight)
- self.expanded = 1
-
- def hide(self, event=None):
- self.stop()
- self.collapse()
-
- import tkinter
- try:
- root = tkinter.Tk()
- # Tk will crash if pythonw.exe has an XP .manifest
- # file and the root has is not destroyed explicitly.
- # If the problem is ever fixed in Tk, the explicit
- # destroy can go.
- try:
- gui = GUI(root)
- root.mainloop()
- finally:
- root.destroy()
- except KeyboardInterrupt:
- pass
-
-
# --------------------------------------- enhanced Web browser interface
def _start_server(urlhandler, port):
@@ -2796,15 +2533,12 @@ def cli():
sys.path.insert(0, '.')
try:
- opts, args = getopt.getopt(sys.argv[1:], 'bgk:p:w')
+ opts, args = getopt.getopt(sys.argv[1:], 'bk:p:w')
writing = False
start_server = False
open_browser = False
port = None
for opt, val in opts:
- if opt == '-g':
- gui()
- return
if opt == '-b':
start_server = True
open_browser = True
@@ -2817,8 +2551,8 @@ def cli():
if opt == '-w':
writing = True
- if start_server == True:
- if port == None:
+ if start_server:
+ if port is None:
port = 0
browse(port, open_browser=open_browser)
return
@@ -2865,9 +2599,6 @@ def cli():
to interactively browse documentation. The -p option can be used with
the -b option to explicitly specify the server port.
-{cmd} -g
- Deprecated.
-
{cmd} -w <name> ...
Write out the HTML documentation for a module to a file in the current
directory. If <name> contains a '{sep}', it is treated as a filename; if
diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py
index 08a9e7c..62a54a9 100644
--- a/Lib/pydoc_data/topics.py
+++ b/Lib/pydoc_data/topics.py
@@ -1,16 +1,17 @@
-# Autogenerated by Sphinx on Thu Feb 23 18:37:54 2012
+# -*- coding: utf-8 -*-
+# Autogenerated by Sphinx on Sat Aug 25 12:12:45 2012
topics = {'assert': '\nThe ``assert`` statement\n************************\n\nAssert statements are a convenient way to insert debugging assertions\ninto a program:\n\n assert_stmt ::= "assert" expression ["," expression]\n\nThe simple form, ``assert expression``, is equivalent to\n\n if __debug__:\n if not expression: raise AssertionError\n\nThe extended form, ``assert expression1, expression2``, is equivalent\nto\n\n if __debug__:\n if not expression1: raise AssertionError(expression2)\n\nThese equivalences assume that ``__debug__`` and ``AssertionError``\nrefer to the built-in variables with those names. In the current\nimplementation, the built-in variable ``__debug__`` is ``True`` under\nnormal circumstances, ``False`` when optimization is requested\n(command line option -O). The current code generator emits no code\nfor an assert statement when optimization is requested at compile\ntime. Note that it is unnecessary to include the source code for the\nexpression that failed in the error message; it will be displayed as\npart of the stack trace.\n\nAssignments to ``__debug__`` are illegal. The value for the built-in\nvariable is determined when the interpreter starts.\n',
'assignment': '\nAssignment statements\n*********************\n\nAssignment statements are used to (re)bind names to values and to\nmodify attributes or items of mutable objects:\n\n assignment_stmt ::= (target_list "=")+ (expression_list | yield_expression)\n target_list ::= target ("," target)* [","]\n target ::= identifier\n | "(" target_list ")"\n | "[" target_list "]"\n | attributeref\n | subscription\n | slicing\n | "*" target\n\n(See section *Primaries* for the syntax definitions for the last three\nsymbols.)\n\nAn assignment statement evaluates the expression list (remember that\nthis can be a single expression or a comma-separated list, the latter\nyielding a tuple) and assigns the single resulting object to each of\nthe target lists, from left to right.\n\nAssignment is defined recursively depending on the form of the target\n(list). When a target is part of a mutable object (an attribute\nreference, subscription or slicing), the mutable object must\nultimately perform the assignment and decide about its validity, and\nmay raise an exception if the assignment is unacceptable. The rules\nobserved by various types and the exceptions raised are given with the\ndefinition of the object types (see section *The standard type\nhierarchy*).\n\nAssignment of an object to a target list, optionally enclosed in\nparentheses or square brackets, is recursively defined as follows.\n\n* If the target list is a single target: The object is assigned to\n that target.\n\n* If the target list is a comma-separated list of targets: The object\n must be an iterable with the same number of items as there are\n targets in the target list, and the items are assigned, from left to\n right, to the corresponding targets.\n\n * If the target list contains one target prefixed with an asterisk,\n called a "starred" target: The object must be a sequence with at\n least as many items as there are targets in the target list, minus\n one. The first items of the sequence are assigned, from left to\n right, to the targets before the starred target. The final items\n of the sequence are assigned to the targets after the starred\n target. A list of the remaining items in the sequence is then\n assigned to the starred target (the list can be empty).\n\n * Else: The object must be a sequence with the same number of items\n as there are targets in the target list, and the items are\n assigned, from left to right, to the corresponding targets.\n\nAssignment of an object to a single target is recursively defined as\nfollows.\n\n* If the target is an identifier (name):\n\n * If the name does not occur in a ``global`` or ``nonlocal``\n statement in the current code block: the name is bound to the\n object in the current local namespace.\n\n * Otherwise: the name is bound to the object in the global namespace\n or the outer namespace determined by ``nonlocal``, respectively.\n\n The name is rebound if it was already bound. This may cause the\n reference count for the object previously bound to the name to reach\n zero, causing the object to be deallocated and its destructor (if it\n has one) to be called.\n\n* If the target is a target list enclosed in parentheses or in square\n brackets: The object must be an iterable with the same number of\n items as there are targets in the target list, and its items are\n assigned, from left to right, to the corresponding targets.\n\n* If the target is an attribute reference: The primary expression in\n the reference is evaluated. It should yield an object with\n assignable attributes; if this is not the case, ``TypeError`` is\n raised. That object is then asked to assign the assigned object to\n the given attribute; if it cannot perform the assignment, it raises\n an exception (usually but not necessarily ``AttributeError``).\n\n Note: If the object is a class instance and the attribute reference\n occurs on both sides of the assignment operator, the RHS expression,\n ``a.x`` can access either an instance attribute or (if no instance\n attribute exists) a class attribute. The LHS target ``a.x`` is\n always set as an instance attribute, creating it if necessary.\n Thus, the two occurrences of ``a.x`` do not necessarily refer to the\n same attribute: if the RHS expression refers to a class attribute,\n the LHS creates a new instance attribute as the target of the\n assignment:\n\n class Cls:\n x = 3 # class variable\n inst = Cls()\n inst.x = inst.x + 1 # writes inst.x as 4 leaving Cls.x as 3\n\n This description does not necessarily apply to descriptor\n attributes, such as properties created with ``property()``.\n\n* If the target is a subscription: The primary expression in the\n reference is evaluated. It should yield either a mutable sequence\n object (such as a list) or a mapping object (such as a dictionary).\n Next, the subscript expression is evaluated.\n\n If the primary is a mutable sequence object (such as a list), the\n subscript must yield an integer. If it is negative, the sequence\'s\n length is added to it. The resulting value must be a nonnegative\n integer less than the sequence\'s length, and the sequence is asked\n to assign the assigned object to its item with that index. If the\n index is out of range, ``IndexError`` is raised (assignment to a\n subscripted sequence cannot add new items to a list).\n\n If the primary is a mapping object (such as a dictionary), the\n subscript must have a type compatible with the mapping\'s key type,\n and the mapping is then asked to create a key/datum pair which maps\n the subscript to the assigned object. This can either replace an\n existing key/value pair with the same key value, or insert a new\n key/value pair (if no key with the same value existed).\n\n For user-defined objects, the ``__setitem__()`` method is called\n with appropriate arguments.\n\n* If the target is a slicing: The primary expression in the reference\n is evaluated. It should yield a mutable sequence object (such as a\n list). The assigned object should be a sequence object of the same\n type. Next, the lower and upper bound expressions are evaluated,\n insofar they are present; defaults are zero and the sequence\'s\n length. The bounds should evaluate to integers. If either bound is\n negative, the sequence\'s length is added to it. The resulting\n bounds are clipped to lie between zero and the sequence\'s length,\n inclusive. Finally, the sequence object is asked to replace the\n slice with the items of the assigned sequence. The length of the\n slice may be different from the length of the assigned sequence,\n thus changing the length of the target sequence, if the object\n allows it.\n\n**CPython implementation detail:** In the current implementation, the\nsyntax for targets is taken to be the same as for expressions, and\ninvalid syntax is rejected during the code generation phase, causing\nless detailed error messages.\n\nWARNING: Although the definition of assignment implies that overlaps\nbetween the left-hand side and the right-hand side are \'safe\' (for\nexample ``a, b = b, a`` swaps two variables), overlaps *within* the\ncollection of assigned-to variables are not safe! For instance, the\nfollowing program prints ``[0, 2]``:\n\n x = [0, 1]\n i = 0\n i, x[i] = 1, 2\n print(x)\n\nSee also:\n\n **PEP 3132** - Extended Iterable Unpacking\n The specification for the ``*target`` feature.\n\n\nAugmented assignment statements\n===============================\n\nAugmented assignment is the combination, in a single statement, of a\nbinary operation and an assignment statement:\n\n augmented_assignment_stmt ::= augtarget augop (expression_list | yield_expression)\n augtarget ::= identifier | attributeref | subscription | slicing\n augop ::= "+=" | "-=" | "*=" | "/=" | "//=" | "%=" | "**="\n | ">>=" | "<<=" | "&=" | "^=" | "|="\n\n(See section *Primaries* for the syntax definitions for the last three\nsymbols.)\n\nAn augmented assignment evaluates the target (which, unlike normal\nassignment statements, cannot be an unpacking) and the expression\nlist, performs the binary operation specific to the type of assignment\non the two operands, and assigns the result to the original target.\nThe target is only evaluated once.\n\nAn augmented assignment expression like ``x += 1`` can be rewritten as\n``x = x + 1`` to achieve a similar, but not exactly equal effect. In\nthe augmented version, ``x`` is only evaluated once. Also, when\npossible, the actual operation is performed *in-place*, meaning that\nrather than creating a new object and assigning that to the target,\nthe old object is modified instead.\n\nWith the exception of assigning to tuples and multiple targets in a\nsingle statement, the assignment done by augmented assignment\nstatements is handled the same way as normal assignments. Similarly,\nwith the exception of the possible *in-place* behavior, the binary\noperation performed by augmented assignment is the same as the normal\nbinary operations.\n\nFor targets which are attribute references, the same *caveat about\nclass and instance attributes* applies as for regular assignments.\n',
'atom-identifiers': '\nIdentifiers (Names)\n*******************\n\nAn identifier occurring as an atom is a name. See section\n*Identifiers and keywords* for lexical definition and section *Naming\nand binding* for documentation of naming and binding.\n\nWhen the name is bound to an object, evaluation of the atom yields\nthat object. When a name is not bound, an attempt to evaluate it\nraises a ``NameError`` exception.\n\n**Private name mangling:** When an identifier that textually occurs in\na class definition begins with two or more underscore characters and\ndoes not end in two or more underscores, it is considered a *private\nname* of that class. Private names are transformed to a longer form\nbefore code is generated for them. The transformation inserts the\nclass name in front of the name, with leading underscores removed, and\na single underscore inserted in front of the class name. For example,\nthe identifier ``__spam`` occurring in a class named ``Ham`` will be\ntransformed to ``_Ham__spam``. This transformation is independent of\nthe syntactical context in which the identifier is used. If the\ntransformed name is extremely long (longer than 255 characters),\nimplementation defined truncation may happen. If the class name\nconsists only of underscores, no transformation is done.\n',
'atom-literals': "\nLiterals\n********\n\nPython supports string and bytes literals and various numeric\nliterals:\n\n literal ::= stringliteral | bytesliteral\n | integer | floatnumber | imagnumber\n\nEvaluation of a literal yields an object of the given type (string,\nbytes, integer, floating point number, complex number) with the given\nvalue. The value may be approximated in the case of floating point\nand imaginary (complex) literals. See section *Literals* for details.\n\nAll literals correspond to immutable data types, and hence the\nobject's identity is less important than its value. Multiple\nevaluations of literals with the same value (either the same\noccurrence in the program text or a different occurrence) may obtain\nthe same object or a different object with the same value.\n",
- 'attribute-access': '\nCustomizing attribute access\n****************************\n\nThe following methods can be defined to customize the meaning of\nattribute access (use of, assignment to, or deletion of ``x.name``)\nfor class instances.\n\nobject.__getattr__(self, name)\n\n Called when an attribute lookup has not found the attribute in the\n usual places (i.e. it is not an instance attribute nor is it found\n in the class tree for ``self``). ``name`` is the attribute name.\n This method should return the (computed) attribute value or raise\n an ``AttributeError`` exception.\n\n Note that if the attribute is found through the normal mechanism,\n ``__getattr__()`` is not called. (This is an intentional asymmetry\n between ``__getattr__()`` and ``__setattr__()``.) This is done both\n for efficiency reasons and because otherwise ``__getattr__()``\n would have no way to access other attributes of the instance. Note\n that at least for instance variables, you can fake total control by\n not inserting any values in the instance attribute dictionary (but\n instead inserting them in another object). See the\n ``__getattribute__()`` method below for a way to actually get total\n control over attribute access.\n\nobject.__getattribute__(self, name)\n\n Called unconditionally to implement attribute accesses for\n instances of the class. If the class also defines\n ``__getattr__()``, the latter will not be called unless\n ``__getattribute__()`` either calls it explicitly or raises an\n ``AttributeError``. This method should return the (computed)\n attribute value or raise an ``AttributeError`` exception. In order\n to avoid infinite recursion in this method, its implementation\n should always call the base class method with the same name to\n access any attributes it needs, for example,\n ``object.__getattribute__(self, name)``.\n\n Note: This method may still be bypassed when looking up special methods\n as the result of implicit invocation via language syntax or\n built-in functions. See *Special method lookup*.\n\nobject.__setattr__(self, name, value)\n\n Called when an attribute assignment is attempted. This is called\n instead of the normal mechanism (i.e. store the value in the\n instance dictionary). *name* is the attribute name, *value* is the\n value to be assigned to it.\n\n If ``__setattr__()`` wants to assign to an instance attribute, it\n should call the base class method with the same name, for example,\n ``object.__setattr__(self, name, value)``.\n\nobject.__delattr__(self, name)\n\n Like ``__setattr__()`` but for attribute deletion instead of\n assignment. This should only be implemented if ``del obj.name`` is\n meaningful for the object.\n\nobject.__dir__(self)\n\n Called when ``dir()`` is called on the object. A list must be\n returned.\n\n\nImplementing Descriptors\n========================\n\nThe following methods only apply when an instance of the class\ncontaining the method (a so-called *descriptor* class) appears in an\n*owner* class (the descriptor must be in either the owner\'s class\ndictionary or in the class dictionary for one of its parents). In the\nexamples below, "the attribute" refers to the attribute whose name is\nthe key of the property in the owner class\' ``__dict__``.\n\nobject.__get__(self, instance, owner)\n\n Called to get the attribute of the owner class (class attribute\n access) or of an instance of that class (instance attribute\n access). *owner* is always the owner class, while *instance* is the\n instance that the attribute was accessed through, or ``None`` when\n the attribute is accessed through the *owner*. This method should\n return the (computed) attribute value or raise an\n ``AttributeError`` exception.\n\nobject.__set__(self, instance, value)\n\n Called to set the attribute on an instance *instance* of the owner\n class to a new value, *value*.\n\nobject.__delete__(self, instance)\n\n Called to delete the attribute on an instance *instance* of the\n owner class.\n\n\nInvoking Descriptors\n====================\n\nIn general, a descriptor is an object attribute with "binding\nbehavior", one whose attribute access has been overridden by methods\nin the descriptor protocol: ``__get__()``, ``__set__()``, and\n``__delete__()``. If any of those methods are defined for an object,\nit is said to be a descriptor.\n\nThe default behavior for attribute access is to get, set, or delete\nthe attribute from an object\'s dictionary. For instance, ``a.x`` has a\nlookup chain starting with ``a.__dict__[\'x\']``, then\n``type(a).__dict__[\'x\']``, and continuing through the base classes of\n``type(a)`` excluding metaclasses.\n\nHowever, if the looked-up value is an object defining one of the\ndescriptor methods, then Python may override the default behavior and\ninvoke the descriptor method instead. Where this occurs in the\nprecedence chain depends on which descriptor methods were defined and\nhow they were called.\n\nThe starting point for descriptor invocation is a binding, ``a.x``.\nHow the arguments are assembled depends on ``a``:\n\nDirect Call\n The simplest and least common call is when user code directly\n invokes a descriptor method: ``x.__get__(a)``.\n\nInstance Binding\n If binding to an object instance, ``a.x`` is transformed into the\n call: ``type(a).__dict__[\'x\'].__get__(a, type(a))``.\n\nClass Binding\n If binding to a class, ``A.x`` is transformed into the call:\n ``A.__dict__[\'x\'].__get__(None, A)``.\n\nSuper Binding\n If ``a`` is an instance of ``super``, then the binding ``super(B,\n obj).m()`` searches ``obj.__class__.__mro__`` for the base class\n ``A`` immediately preceding ``B`` and then invokes the descriptor\n with the call: ``A.__dict__[\'m\'].__get__(obj, obj.__class__)``.\n\nFor instance bindings, the precedence of descriptor invocation depends\non the which descriptor methods are defined. A descriptor can define\nany combination of ``__get__()``, ``__set__()`` and ``__delete__()``.\nIf it does not define ``__get__()``, then accessing the attribute will\nreturn the descriptor object itself unless there is a value in the\nobject\'s instance dictionary. If the descriptor defines ``__set__()``\nand/or ``__delete__()``, it is a data descriptor; if it defines\nneither, it is a non-data descriptor. Normally, data descriptors\ndefine both ``__get__()`` and ``__set__()``, while non-data\ndescriptors have just the ``__get__()`` method. Data descriptors with\n``__set__()`` and ``__get__()`` defined always override a redefinition\nin an instance dictionary. In contrast, non-data descriptors can be\noverridden by instances.\n\nPython methods (including ``staticmethod()`` and ``classmethod()``)\nare implemented as non-data descriptors. Accordingly, instances can\nredefine and override methods. This allows individual instances to\nacquire behaviors that differ from other instances of the same class.\n\nThe ``property()`` function is implemented as a data descriptor.\nAccordingly, instances cannot override the behavior of a property.\n\n\n__slots__\n=========\n\nBy default, instances of classes have a dictionary for attribute\nstorage. This wastes space for objects having very few instance\nvariables. The space consumption can become acute when creating large\nnumbers of instances.\n\nThe default can be overridden by defining *__slots__* in a class\ndefinition. The *__slots__* declaration takes a sequence of instance\nvariables and reserves just enough space in each instance to hold a\nvalue for each variable. Space is saved because *__dict__* is not\ncreated for each instance.\n\nobject.__slots__\n\n This class variable can be assigned a string, iterable, or sequence\n of strings with variable names used by instances. If defined in a\n class, *__slots__* reserves space for the declared variables and\n prevents the automatic creation of *__dict__* and *__weakref__* for\n each instance.\n\n\nNotes on using *__slots__*\n--------------------------\n\n* When inheriting from a class without *__slots__*, the *__dict__*\n attribute of that class will always be accessible, so a *__slots__*\n definition in the subclass is meaningless.\n\n* Without a *__dict__* variable, instances cannot be assigned new\n variables not listed in the *__slots__* definition. Attempts to\n assign to an unlisted variable name raises ``AttributeError``. If\n dynamic assignment of new variables is desired, then add\n ``\'__dict__\'`` to the sequence of strings in the *__slots__*\n declaration.\n\n* Without a *__weakref__* variable for each instance, classes defining\n *__slots__* do not support weak references to its instances. If weak\n reference support is needed, then add ``\'__weakref__\'`` to the\n sequence of strings in the *__slots__* declaration.\n\n* *__slots__* are implemented at the class level by creating\n descriptors (*Implementing Descriptors*) for each variable name. As\n a result, class attributes cannot be used to set default values for\n instance variables defined by *__slots__*; otherwise, the class\n attribute would overwrite the descriptor assignment.\n\n* The action of a *__slots__* declaration is limited to the class\n where it is defined. As a result, subclasses will have a *__dict__*\n unless they also define *__slots__* (which must only contain names\n of any *additional* slots).\n\n* If a class defines a slot also defined in a base class, the instance\n variable defined by the base class slot is inaccessible (except by\n retrieving its descriptor directly from the base class). This\n renders the meaning of the program undefined. In the future, a\n check may be added to prevent this.\n\n* Nonempty *__slots__* does not work for classes derived from\n "variable-length" built-in types such as ``int``, ``str`` and\n ``tuple``.\n\n* Any non-string iterable may be assigned to *__slots__*. Mappings may\n also be used; however, in the future, special meaning may be\n assigned to the values corresponding to each key.\n\n* *__class__* assignment works only if both classes have the same\n *__slots__*.\n',
+ 'attribute-access': '\nCustomizing attribute access\n****************************\n\nThe following methods can be defined to customize the meaning of\nattribute access (use of, assignment to, or deletion of ``x.name``)\nfor class instances.\n\nobject.__getattr__(self, name)\n\n Called when an attribute lookup has not found the attribute in the\n usual places (i.e. it is not an instance attribute nor is it found\n in the class tree for ``self``). ``name`` is the attribute name.\n This method should return the (computed) attribute value or raise\n an ``AttributeError`` exception.\n\n Note that if the attribute is found through the normal mechanism,\n ``__getattr__()`` is not called. (This is an intentional asymmetry\n between ``__getattr__()`` and ``__setattr__()``.) This is done both\n for efficiency reasons and because otherwise ``__getattr__()``\n would have no way to access other attributes of the instance. Note\n that at least for instance variables, you can fake total control by\n not inserting any values in the instance attribute dictionary (but\n instead inserting them in another object). See the\n ``__getattribute__()`` method below for a way to actually get total\n control over attribute access.\n\nobject.__getattribute__(self, name)\n\n Called unconditionally to implement attribute accesses for\n instances of the class. If the class also defines\n ``__getattr__()``, the latter will not be called unless\n ``__getattribute__()`` either calls it explicitly or raises an\n ``AttributeError``. This method should return the (computed)\n attribute value or raise an ``AttributeError`` exception. In order\n to avoid infinite recursion in this method, its implementation\n should always call the base class method with the same name to\n access any attributes it needs, for example,\n ``object.__getattribute__(self, name)``.\n\n Note: This method may still be bypassed when looking up special methods\n as the result of implicit invocation via language syntax or\n built-in functions. See *Special method lookup*.\n\nobject.__setattr__(self, name, value)\n\n Called when an attribute assignment is attempted. This is called\n instead of the normal mechanism (i.e. store the value in the\n instance dictionary). *name* is the attribute name, *value* is the\n value to be assigned to it.\n\n If ``__setattr__()`` wants to assign to an instance attribute, it\n should call the base class method with the same name, for example,\n ``object.__setattr__(self, name, value)``.\n\nobject.__delattr__(self, name)\n\n Like ``__setattr__()`` but for attribute deletion instead of\n assignment. This should only be implemented if ``del obj.name`` is\n meaningful for the object.\n\nobject.__dir__(self)\n\n Called when ``dir()`` is called on the object. A sequence must be\n returned. ``dir()`` converts the returned sequence to a list and\n sorts it.\n\n\nImplementing Descriptors\n========================\n\nThe following methods only apply when an instance of the class\ncontaining the method (a so-called *descriptor* class) appears in an\n*owner* class (the descriptor must be in either the owner\'s class\ndictionary or in the class dictionary for one of its parents). In the\nexamples below, "the attribute" refers to the attribute whose name is\nthe key of the property in the owner class\' ``__dict__``.\n\nobject.__get__(self, instance, owner)\n\n Called to get the attribute of the owner class (class attribute\n access) or of an instance of that class (instance attribute\n access). *owner* is always the owner class, while *instance* is the\n instance that the attribute was accessed through, or ``None`` when\n the attribute is accessed through the *owner*. This method should\n return the (computed) attribute value or raise an\n ``AttributeError`` exception.\n\nobject.__set__(self, instance, value)\n\n Called to set the attribute on an instance *instance* of the owner\n class to a new value, *value*.\n\nobject.__delete__(self, instance)\n\n Called to delete the attribute on an instance *instance* of the\n owner class.\n\n\nInvoking Descriptors\n====================\n\nIn general, a descriptor is an object attribute with "binding\nbehavior", one whose attribute access has been overridden by methods\nin the descriptor protocol: ``__get__()``, ``__set__()``, and\n``__delete__()``. If any of those methods are defined for an object,\nit is said to be a descriptor.\n\nThe default behavior for attribute access is to get, set, or delete\nthe attribute from an object\'s dictionary. For instance, ``a.x`` has a\nlookup chain starting with ``a.__dict__[\'x\']``, then\n``type(a).__dict__[\'x\']``, and continuing through the base classes of\n``type(a)`` excluding metaclasses.\n\nHowever, if the looked-up value is an object defining one of the\ndescriptor methods, then Python may override the default behavior and\ninvoke the descriptor method instead. Where this occurs in the\nprecedence chain depends on which descriptor methods were defined and\nhow they were called.\n\nThe starting point for descriptor invocation is a binding, ``a.x``.\nHow the arguments are assembled depends on ``a``:\n\nDirect Call\n The simplest and least common call is when user code directly\n invokes a descriptor method: ``x.__get__(a)``.\n\nInstance Binding\n If binding to an object instance, ``a.x`` is transformed into the\n call: ``type(a).__dict__[\'x\'].__get__(a, type(a))``.\n\nClass Binding\n If binding to a class, ``A.x`` is transformed into the call:\n ``A.__dict__[\'x\'].__get__(None, A)``.\n\nSuper Binding\n If ``a`` is an instance of ``super``, then the binding ``super(B,\n obj).m()`` searches ``obj.__class__.__mro__`` for the base class\n ``A`` immediately preceding ``B`` and then invokes the descriptor\n with the call: ``A.__dict__[\'m\'].__get__(obj, obj.__class__)``.\n\nFor instance bindings, the precedence of descriptor invocation depends\non the which descriptor methods are defined. A descriptor can define\nany combination of ``__get__()``, ``__set__()`` and ``__delete__()``.\nIf it does not define ``__get__()``, then accessing the attribute will\nreturn the descriptor object itself unless there is a value in the\nobject\'s instance dictionary. If the descriptor defines ``__set__()``\nand/or ``__delete__()``, it is a data descriptor; if it defines\nneither, it is a non-data descriptor. Normally, data descriptors\ndefine both ``__get__()`` and ``__set__()``, while non-data\ndescriptors have just the ``__get__()`` method. Data descriptors with\n``__set__()`` and ``__get__()`` defined always override a redefinition\nin an instance dictionary. In contrast, non-data descriptors can be\noverridden by instances.\n\nPython methods (including ``staticmethod()`` and ``classmethod()``)\nare implemented as non-data descriptors. Accordingly, instances can\nredefine and override methods. This allows individual instances to\nacquire behaviors that differ from other instances of the same class.\n\nThe ``property()`` function is implemented as a data descriptor.\nAccordingly, instances cannot override the behavior of a property.\n\n\n__slots__\n=========\n\nBy default, instances of classes have a dictionary for attribute\nstorage. This wastes space for objects having very few instance\nvariables. The space consumption can become acute when creating large\nnumbers of instances.\n\nThe default can be overridden by defining *__slots__* in a class\ndefinition. The *__slots__* declaration takes a sequence of instance\nvariables and reserves just enough space in each instance to hold a\nvalue for each variable. Space is saved because *__dict__* is not\ncreated for each instance.\n\nobject.__slots__\n\n This class variable can be assigned a string, iterable, or sequence\n of strings with variable names used by instances. If defined in a\n class, *__slots__* reserves space for the declared variables and\n prevents the automatic creation of *__dict__* and *__weakref__* for\n each instance.\n\n\nNotes on using *__slots__*\n--------------------------\n\n* When inheriting from a class without *__slots__*, the *__dict__*\n attribute of that class will always be accessible, so a *__slots__*\n definition in the subclass is meaningless.\n\n* Without a *__dict__* variable, instances cannot be assigned new\n variables not listed in the *__slots__* definition. Attempts to\n assign to an unlisted variable name raises ``AttributeError``. If\n dynamic assignment of new variables is desired, then add\n ``\'__dict__\'`` to the sequence of strings in the *__slots__*\n declaration.\n\n* Without a *__weakref__* variable for each instance, classes defining\n *__slots__* do not support weak references to its instances. If weak\n reference support is needed, then add ``\'__weakref__\'`` to the\n sequence of strings in the *__slots__* declaration.\n\n* *__slots__* are implemented at the class level by creating\n descriptors (*Implementing Descriptors*) for each variable name. As\n a result, class attributes cannot be used to set default values for\n instance variables defined by *__slots__*; otherwise, the class\n attribute would overwrite the descriptor assignment.\n\n* The action of a *__slots__* declaration is limited to the class\n where it is defined. As a result, subclasses will have a *__dict__*\n unless they also define *__slots__* (which must only contain names\n of any *additional* slots).\n\n* If a class defines a slot also defined in a base class, the instance\n variable defined by the base class slot is inaccessible (except by\n retrieving its descriptor directly from the base class). This\n renders the meaning of the program undefined. In the future, a\n check may be added to prevent this.\n\n* Nonempty *__slots__* does not work for classes derived from\n "variable-length" built-in types such as ``int``, ``str`` and\n ``tuple``.\n\n* Any non-string iterable may be assigned to *__slots__*. Mappings may\n also be used; however, in the future, special meaning may be\n assigned to the values corresponding to each key.\n\n* *__class__* assignment works only if both classes have the same\n *__slots__*.\n',
'attribute-references': '\nAttribute references\n********************\n\nAn attribute reference is a primary followed by a period and a name:\n\n attributeref ::= primary "." identifier\n\nThe primary must evaluate to an object of a type that supports\nattribute references, which most objects do. This object is then\nasked to produce the attribute whose name is the identifier (which can\nbe customized by overriding the ``__getattr__()`` method). If this\nattribute is not available, the exception ``AttributeError`` is\nraised. Otherwise, the type and value of the object produced is\ndetermined by the object. Multiple evaluations of the same attribute\nreference may yield different objects.\n',
'augassign': '\nAugmented assignment statements\n*******************************\n\nAugmented assignment is the combination, in a single statement, of a\nbinary operation and an assignment statement:\n\n augmented_assignment_stmt ::= augtarget augop (expression_list | yield_expression)\n augtarget ::= identifier | attributeref | subscription | slicing\n augop ::= "+=" | "-=" | "*=" | "/=" | "//=" | "%=" | "**="\n | ">>=" | "<<=" | "&=" | "^=" | "|="\n\n(See section *Primaries* for the syntax definitions for the last three\nsymbols.)\n\nAn augmented assignment evaluates the target (which, unlike normal\nassignment statements, cannot be an unpacking) and the expression\nlist, performs the binary operation specific to the type of assignment\non the two operands, and assigns the result to the original target.\nThe target is only evaluated once.\n\nAn augmented assignment expression like ``x += 1`` can be rewritten as\n``x = x + 1`` to achieve a similar, but not exactly equal effect. In\nthe augmented version, ``x`` is only evaluated once. Also, when\npossible, the actual operation is performed *in-place*, meaning that\nrather than creating a new object and assigning that to the target,\nthe old object is modified instead.\n\nWith the exception of assigning to tuples and multiple targets in a\nsingle statement, the assignment done by augmented assignment\nstatements is handled the same way as normal assignments. Similarly,\nwith the exception of the possible *in-place* behavior, the binary\noperation performed by augmented assignment is the same as the normal\nbinary operations.\n\nFor targets which are attribute references, the same *caveat about\nclass and instance attributes* applies as for regular assignments.\n',
- 'binary': '\nBinary arithmetic operations\n****************************\n\nThe binary arithmetic operations have the conventional priority\nlevels. Note that some of these operations also apply to certain non-\nnumeric types. Apart from the power operator, there are only two\nlevels, one for multiplicative operators and one for additive\noperators:\n\n m_expr ::= u_expr | m_expr "*" u_expr | m_expr "//" u_expr | m_expr "/" u_expr\n | m_expr "%" u_expr\n a_expr ::= m_expr | a_expr "+" m_expr | a_expr "-" m_expr\n\nThe ``*`` (multiplication) operator yields the product of its\narguments. The arguments must either both be numbers, or one argument\nmust be an integer and the other must be a sequence. In the former\ncase, the numbers are converted to a common type and then multiplied\ntogether. In the latter case, sequence repetition is performed; a\nnegative repetition factor yields an empty sequence.\n\nThe ``/`` (division) and ``//`` (floor division) operators yield the\nquotient of their arguments. The numeric arguments are first\nconverted to a common type. Integer division yields a float, while\nfloor division of integers results in an integer; the result is that\nof mathematical division with the \'floor\' function applied to the\nresult. Division by zero raises the ``ZeroDivisionError`` exception.\n\nThe ``%`` (modulo) operator yields the remainder from the division of\nthe first argument by the second. The numeric arguments are first\nconverted to a common type. A zero right argument raises the\n``ZeroDivisionError`` exception. The arguments may be floating point\nnumbers, e.g., ``3.14%0.7`` equals ``0.34`` (since ``3.14`` equals\n``4*0.7 + 0.34``.) The modulo operator always yields a result with\nthe same sign as its second operand (or zero); the absolute value of\nthe result is strictly smaller than the absolute value of the second\noperand [1].\n\nThe floor division and modulo operators are connected by the following\nidentity: ``x == (x//y)*y + (x%y)``. Floor division and modulo are\nalso connected with the built-in function ``divmod()``: ``divmod(x, y)\n== (x//y, x%y)``. [2].\n\nIn addition to performing the modulo operation on numbers, the ``%``\noperator is also overloaded by string objects to perform old-style\nstring formatting (also known as interpolation). The syntax for\nstring formatting is described in the Python Library Reference,\nsection *Old String Formatting Operations*.\n\nThe floor division operator, the modulo operator, and the ``divmod()``\nfunction are not defined for complex numbers. Instead, convert to a\nfloating point number using the ``abs()`` function if appropriate.\n\nThe ``+`` (addition) operator yields the sum of its arguments. The\narguments must either both be numbers or both sequences of the same\ntype. In the former case, the numbers are converted to a common type\nand then added together. In the latter case, the sequences are\nconcatenated.\n\nThe ``-`` (subtraction) operator yields the difference of its\narguments. The numeric arguments are first converted to a common\ntype.\n',
+ 'binary': '\nBinary arithmetic operations\n****************************\n\nThe binary arithmetic operations have the conventional priority\nlevels. Note that some of these operations also apply to certain non-\nnumeric types. Apart from the power operator, there are only two\nlevels, one for multiplicative operators and one for additive\noperators:\n\n m_expr ::= u_expr | m_expr "*" u_expr | m_expr "//" u_expr | m_expr "/" u_expr\n | m_expr "%" u_expr\n a_expr ::= m_expr | a_expr "+" m_expr | a_expr "-" m_expr\n\nThe ``*`` (multiplication) operator yields the product of its\narguments. The arguments must either both be numbers, or one argument\nmust be an integer and the other must be a sequence. In the former\ncase, the numbers are converted to a common type and then multiplied\ntogether. In the latter case, sequence repetition is performed; a\nnegative repetition factor yields an empty sequence.\n\nThe ``/`` (division) and ``//`` (floor division) operators yield the\nquotient of their arguments. The numeric arguments are first\nconverted to a common type. Integer division yields a float, while\nfloor division of integers results in an integer; the result is that\nof mathematical division with the \'floor\' function applied to the\nresult. Division by zero raises the ``ZeroDivisionError`` exception.\n\nThe ``%`` (modulo) operator yields the remainder from the division of\nthe first argument by the second. The numeric arguments are first\nconverted to a common type. A zero right argument raises the\n``ZeroDivisionError`` exception. The arguments may be floating point\nnumbers, e.g., ``3.14%0.7`` equals ``0.34`` (since ``3.14`` equals\n``4*0.7 + 0.34``.) The modulo operator always yields a result with\nthe same sign as its second operand (or zero); the absolute value of\nthe result is strictly smaller than the absolute value of the second\noperand [1].\n\nThe floor division and modulo operators are connected by the following\nidentity: ``x == (x//y)*y + (x%y)``. Floor division and modulo are\nalso connected with the built-in function ``divmod()``: ``divmod(x, y)\n== (x//y, x%y)``. [2].\n\nIn addition to performing the modulo operation on numbers, the ``%``\noperator is also overloaded by string objects to perform old-style\nstring formatting (also known as interpolation). The syntax for\nstring formatting is described in the Python Library Reference,\nsection *printf-style String Formatting*.\n\nThe floor division operator, the modulo operator, and the ``divmod()``\nfunction are not defined for complex numbers. Instead, convert to a\nfloating point number using the ``abs()`` function if appropriate.\n\nThe ``+`` (addition) operator yields the sum of its arguments. The\narguments must either both be numbers or both sequences of the same\ntype. In the former case, the numbers are converted to a common type\nand then added together. In the latter case, the sequences are\nconcatenated.\n\nThe ``-`` (subtraction) operator yields the difference of its\narguments. The numeric arguments are first converted to a common\ntype.\n',
'bitwise': '\nBinary bitwise operations\n*************************\n\nEach of the three bitwise operations has a different priority level:\n\n and_expr ::= shift_expr | and_expr "&" shift_expr\n xor_expr ::= and_expr | xor_expr "^" and_expr\n or_expr ::= xor_expr | or_expr "|" xor_expr\n\nThe ``&`` operator yields the bitwise AND of its arguments, which must\nbe integers.\n\nThe ``^`` operator yields the bitwise XOR (exclusive OR) of its\narguments, which must be integers.\n\nThe ``|`` operator yields the bitwise (inclusive) OR of its arguments,\nwhich must be integers.\n',
'bltin-code-objects': '\nCode Objects\n************\n\nCode objects are used by the implementation to represent "pseudo-\ncompiled" executable Python code such as a function body. They differ\nfrom function objects because they don\'t contain a reference to their\nglobal execution environment. Code objects are returned by the built-\nin ``compile()`` function and can be extracted from function objects\nthrough their ``__code__`` attribute. See also the ``code`` module.\n\nA code object can be executed or evaluated by passing it (instead of a\nsource string) to the ``exec()`` or ``eval()`` built-in functions.\n\nSee *The standard type hierarchy* for more information.\n',
- 'bltin-ellipsis-object': '\nThe Ellipsis Object\n*******************\n\nThis object is commonly used by slicing (see *Slicings*). It supports\nno special operations. There is exactly one ellipsis object, named\n``Ellipsis`` (a built-in name).\n\nIt is written as ``Ellipsis`` or ``...``.\n',
- 'bltin-null-object': "\nThe Null Object\n***************\n\nThis object is returned by functions that don't explicitly return a\nvalue. It supports no special operations. There is exactly one null\nobject, named ``None`` (a built-in name).\n\nIt is written as ``None``.\n",
+ 'bltin-ellipsis-object': '\nThe Ellipsis Object\n*******************\n\nThis object is commonly used by slicing (see *Slicings*). It supports\nno special operations. There is exactly one ellipsis object, named\n``Ellipsis`` (a built-in name). ``type(Ellipsis)()`` produces the\n``Ellipsis`` singleton.\n\nIt is written as ``Ellipsis`` or ``...``.\n',
+ 'bltin-null-object': "\nThe Null Object\n***************\n\nThis object is returned by functions that don't explicitly return a\nvalue. It supports no special operations. There is exactly one null\nobject, named ``None`` (a built-in name). ``type(None)()`` produces\nthe same singleton.\n\nIt is written as ``None``.\n",
'bltin-type-objects': "\nType Objects\n************\n\nType objects represent the various object types. An object's type is\naccessed by the built-in function ``type()``. There are no special\noperations on types. The standard module ``types`` defines names for\nall standard built-in types.\n\nTypes are written like this: ``<class 'int'>``.\n",
'booleans': '\nBoolean operations\n******************\n\n or_test ::= and_test | or_test "or" and_test\n and_test ::= not_test | and_test "and" not_test\n not_test ::= comparison | "not" not_test\n\nIn the context of Boolean operations, and also when expressions are\nused by control flow statements, the following values are interpreted\nas false: ``False``, ``None``, numeric zero of all types, and empty\nstrings and containers (including strings, tuples, lists,\ndictionaries, sets and frozensets). All other values are interpreted\nas true. User-defined objects can customize their truth value by\nproviding a ``__bool__()`` method.\n\nThe operator ``not`` yields ``True`` if its argument is false,\n``False`` otherwise.\n\nThe expression ``x and y`` first evaluates *x*; if *x* is false, its\nvalue is returned; otherwise, *y* is evaluated and the resulting value\nis returned.\n\nThe expression ``x or y`` first evaluates *x*; if *x* is true, its\nvalue is returned; otherwise, *y* is evaluated and the resulting value\nis returned.\n\n(Note that neither ``and`` nor ``or`` restrict the value and type they\nreturn to ``False`` and ``True``, but rather return the last evaluated\nargument. This is sometimes useful, e.g., if ``s`` is a string that\nshould be replaced by a default value if it is empty, the expression\n``s or \'foo\'`` yields the desired value. Because ``not`` has to\ninvent a value anyway, it does not bother to return a value of the\nsame type as its argument, so e.g., ``not \'foo\'`` yields ``False``,\nnot ``\'\'``.)\n',
'break': '\nThe ``break`` statement\n***********************\n\n break_stmt ::= "break"\n\n``break`` may only occur syntactically nested in a ``for`` or\n``while`` loop, but not nested in a function or class definition\nwithin that loop.\n\nIt terminates the nearest enclosing loop, skipping the optional\n``else`` clause if the loop has one.\n\nIf a ``for`` loop is terminated by ``break``, the loop control target\nkeeps its current value.\n\nWhen ``break`` passes control out of a ``try`` statement with a\n``finally`` clause, that ``finally`` clause is executed before really\nleaving the loop.\n',
@@ -18,13 +19,13 @@ topics = {'assert': '\nThe ``assert`` statement\n************************\n\nAss
'calls': '\nCalls\n*****\n\nA call calls a callable object (e.g., a function) with a possibly\nempty series of arguments:\n\n call ::= primary "(" [argument_list [","] | comprehension] ")"\n argument_list ::= positional_arguments ["," keyword_arguments]\n ["," "*" expression] ["," keyword_arguments]\n ["," "**" expression]\n | keyword_arguments ["," "*" expression]\n ["," keyword_arguments] ["," "**" expression]\n | "*" expression ["," keyword_arguments] ["," "**" expression]\n | "**" expression\n positional_arguments ::= expression ("," expression)*\n keyword_arguments ::= keyword_item ("," keyword_item)*\n keyword_item ::= identifier "=" expression\n\nA trailing comma may be present after the positional and keyword\narguments but does not affect the semantics.\n\nThe primary must evaluate to a callable object (user-defined\nfunctions, built-in functions, methods of built-in objects, class\nobjects, methods of class instances, and all objects having a\n``__call__()`` method are callable). All argument expressions are\nevaluated before the call is attempted. Please refer to section\n*Function definitions* for the syntax of formal parameter lists.\n\nIf keyword arguments are present, they are first converted to\npositional arguments, as follows. First, a list of unfilled slots is\ncreated for the formal parameters. If there are N positional\narguments, they are placed in the first N slots. Next, for each\nkeyword argument, the identifier is used to determine the\ncorresponding slot (if the identifier is the same as the first formal\nparameter name, the first slot is used, and so on). If the slot is\nalready filled, a ``TypeError`` exception is raised. Otherwise, the\nvalue of the argument is placed in the slot, filling it (even if the\nexpression is ``None``, it fills the slot). When all arguments have\nbeen processed, the slots that are still unfilled are filled with the\ncorresponding default value from the function definition. (Default\nvalues are calculated, once, when the function is defined; thus, a\nmutable object such as a list or dictionary used as default value will\nbe shared by all calls that don\'t specify an argument value for the\ncorresponding slot; this should usually be avoided.) If there are any\nunfilled slots for which no default value is specified, a\n``TypeError`` exception is raised. Otherwise, the list of filled\nslots is used as the argument list for the call.\n\n**CPython implementation detail:** An implementation may provide\nbuilt-in functions whose positional parameters do not have names, even\nif they are \'named\' for the purpose of documentation, and which\ntherefore cannot be supplied by keyword. In CPython, this is the case\nfor functions implemented in C that use ``PyArg_ParseTuple()`` to\nparse their arguments.\n\nIf there are more positional arguments than there are formal parameter\nslots, a ``TypeError`` exception is raised, unless a formal parameter\nusing the syntax ``*identifier`` is present; in this case, that formal\nparameter receives a tuple containing the excess positional arguments\n(or an empty tuple if there were no excess positional arguments).\n\nIf any keyword argument does not correspond to a formal parameter\nname, a ``TypeError`` exception is raised, unless a formal parameter\nusing the syntax ``**identifier`` is present; in this case, that\nformal parameter receives a dictionary containing the excess keyword\narguments (using the keywords as keys and the argument values as\ncorresponding values), or a (new) empty dictionary if there were no\nexcess keyword arguments.\n\nIf the syntax ``*expression`` appears in the function call,\n``expression`` must evaluate to an iterable. Elements from this\niterable are treated as if they were additional positional arguments;\nif there are positional arguments *x1*, ..., *xN*, and ``expression``\nevaluates to a sequence *y1*, ..., *yM*, this is equivalent to a call\nwith M+N positional arguments *x1*, ..., *xN*, *y1*, ..., *yM*.\n\nA consequence of this is that although the ``*expression`` syntax may\nappear *after* some keyword arguments, it is processed *before* the\nkeyword arguments (and the ``**expression`` argument, if any -- see\nbelow). So:\n\n >>> def f(a, b):\n ... print(a, b)\n ...\n >>> f(b=1, *(2,))\n 2 1\n >>> f(a=1, *(2,))\n Traceback (most recent call last):\n File "<stdin>", line 1, in ?\n TypeError: f() got multiple values for keyword argument \'a\'\n >>> f(1, *(2,))\n 1 2\n\nIt is unusual for both keyword arguments and the ``*expression``\nsyntax to be used in the same call, so in practice this confusion does\nnot arise.\n\nIf the syntax ``**expression`` appears in the function call,\n``expression`` must evaluate to a mapping, the contents of which are\ntreated as additional keyword arguments. In the case of a keyword\nappearing in both ``expression`` and as an explicit keyword argument,\na ``TypeError`` exception is raised.\n\nFormal parameters using the syntax ``*identifier`` or ``**identifier``\ncannot be used as positional argument slots or as keyword argument\nnames.\n\nA call always returns some value, possibly ``None``, unless it raises\nan exception. How this value is computed depends on the type of the\ncallable object.\n\nIf it is---\n\na user-defined function:\n The code block for the function is executed, passing it the\n argument list. The first thing the code block will do is bind the\n formal parameters to the arguments; this is described in section\n *Function definitions*. When the code block executes a ``return``\n statement, this specifies the return value of the function call.\n\na built-in function or method:\n The result is up to the interpreter; see *Built-in Functions* for\n the descriptions of built-in functions and methods.\n\na class object:\n A new instance of that class is returned.\n\na class instance method:\n The corresponding user-defined function is called, with an argument\n list that is one longer than the argument list of the call: the\n instance becomes the first argument.\n\na class instance:\n The class must define a ``__call__()`` method; the effect is then\n the same as if that method was called.\n',
'class': '\nClass definitions\n*****************\n\nA class definition defines a class object (see section *The standard\ntype hierarchy*):\n\n classdef ::= [decorators] "class" classname [inheritance] ":" suite\n inheritance ::= "(" [parameter_list] ")"\n classname ::= identifier\n\nA class definition is an executable statement. The inheritance list\nusually gives a list of base classes (see *Customizing class creation*\nfor more advanced uses), so each item in the list should evaluate to a\nclass object which allows subclassing. Classes without an inheritance\nlist inherit, by default, from the base class ``object``; hence,\n\n class Foo:\n pass\n\nis equivalent to\n\n class Foo(object):\n pass\n\nThe class\'s suite is then executed in a new execution frame (see\n*Naming and binding*), using a newly created local namespace and the\noriginal global namespace. (Usually, the suite contains mostly\nfunction definitions.) When the class\'s suite finishes execution, its\nexecution frame is discarded but its local namespace is saved. [4] A\nclass object is then created using the inheritance list for the base\nclasses and the saved local namespace for the attribute dictionary.\nThe class name is bound to this class object in the original local\nnamespace.\n\nClass creation can be customized heavily using *metaclasses*.\n\nClasses can also be decorated: just like when decorating functions,\n\n @f1(arg)\n @f2\n class Foo: pass\n\nis equivalent to\n\n class Foo: pass\n Foo = f1(arg)(f2(Foo))\n\nThe evaluation rules for the decorator expressions are the same as for\nfunction decorators. The result must be a class object, which is then\nbound to the class name.\n\n**Programmer\'s note:** Variables defined in the class definition are\nclass attributes; they are shared by instances. Instance attributes\ncan be set in a method with ``self.name = value``. Both class and\ninstance attributes are accessible through the notation\n"``self.name``", and an instance attribute hides a class attribute\nwith the same name when accessed in this way. Class attributes can be\nused as defaults for instance attributes, but using mutable values\nthere can lead to unexpected results. *Descriptors* can be used to\ncreate instance variables with different implementation details.\n\nSee also:\n\n **PEP 3115** - Metaclasses in Python 3 **PEP 3129** - Class\n Decorators\n\n-[ Footnotes ]-\n\n[1] The exception is propagated to the invocation stack unless there\n is a ``finally`` clause which happens to raise another exception.\n That new exception causes the old one to be lost.\n\n[2] Currently, control "flows off the end" except in the case of an\n exception or the execution of a ``return``, ``continue``, or\n ``break`` statement.\n\n[3] A string literal appearing as the first statement in the function\n body is transformed into the function\'s ``__doc__`` attribute and\n therefore the function\'s *docstring*.\n\n[4] A string literal appearing as the first statement in the class\n body is transformed into the namespace\'s ``__doc__`` item and\n therefore the class\'s *docstring*.\n',
'comparisons': '\nComparisons\n***********\n\nUnlike C, all comparison operations in Python have the same priority,\nwhich is lower than that of any arithmetic, shifting or bitwise\noperation. Also unlike C, expressions like ``a < b < c`` have the\ninterpretation that is conventional in mathematics:\n\n comparison ::= or_expr ( comp_operator or_expr )*\n comp_operator ::= "<" | ">" | "==" | ">=" | "<=" | "!="\n | "is" ["not"] | ["not"] "in"\n\nComparisons yield boolean values: ``True`` or ``False``.\n\nComparisons can be chained arbitrarily, e.g., ``x < y <= z`` is\nequivalent to ``x < y and y <= z``, except that ``y`` is evaluated\nonly once (but in both cases ``z`` is not evaluated at all when ``x <\ny`` is found to be false).\n\nFormally, if *a*, *b*, *c*, ..., *y*, *z* are expressions and *op1*,\n*op2*, ..., *opN* are comparison operators, then ``a op1 b op2 c ... y\nopN z`` is equivalent to ``a op1 b and b op2 c and ... y opN z``,\nexcept that each expression is evaluated at most once.\n\nNote that ``a op1 b op2 c`` doesn\'t imply any kind of comparison\nbetween *a* and *c*, so that, e.g., ``x < y > z`` is perfectly legal\n(though perhaps not pretty).\n\nThe operators ``<``, ``>``, ``==``, ``>=``, ``<=``, and ``!=`` compare\nthe values of two objects. The objects need not have the same type.\nIf both are numbers, they are converted to a common type. Otherwise,\nthe ``==`` and ``!=`` operators *always* consider objects of different\ntypes to be unequal, while the ``<``, ``>``, ``>=`` and ``<=``\noperators raise a ``TypeError`` when comparing objects of different\ntypes that do not implement these operators for the given pair of\ntypes. You can control comparison behavior of objects of non-built-in\ntypes by defining rich comparison methods like ``__gt__()``, described\nin section *Basic customization*.\n\nComparison of objects of the same type depends on the type:\n\n* Numbers are compared arithmetically.\n\n* The values ``float(\'NaN\')`` and ``Decimal(\'NaN\')`` are special. The\n are identical to themselves, ``x is x`` but are not equal to\n themselves, ``x != x``. Additionally, comparing any value to a\n not-a-number value will return ``False``. For example, both ``3 <\n float(\'NaN\')`` and ``float(\'NaN\') < 3`` will return ``False``.\n\n* Bytes objects are compared lexicographically using the numeric\n values of their elements.\n\n* Strings are compared lexicographically using the numeric equivalents\n (the result of the built-in function ``ord()``) of their characters.\n [3] String and bytes object can\'t be compared!\n\n* Tuples and lists are compared lexicographically using comparison of\n corresponding elements. This means that to compare equal, each\n element must compare equal and the two sequences must be of the same\n type and have the same length.\n\n If not equal, the sequences are ordered the same as their first\n differing elements. For example, ``[1,2,x] <= [1,2,y]`` has the\n same value as ``x <= y``. If the corresponding element does not\n exist, the shorter sequence is ordered first (for example, ``[1,2] <\n [1,2,3]``).\n\n* Mappings (dictionaries) compare equal if and only if they have the\n same ``(key, value)`` pairs. Order comparisons ``(\'<\', \'<=\', \'>=\',\n \'>\')`` raise ``TypeError``.\n\n* Sets and frozensets define comparison operators to mean subset and\n superset tests. Those relations do not define total orderings (the\n two sets ``{1,2}`` and {2,3} are not equal, nor subsets of one\n another, nor supersets of one another). Accordingly, sets are not\n appropriate arguments for functions which depend on total ordering.\n For example, ``min()``, ``max()``, and ``sorted()`` produce\n undefined results given a list of sets as inputs.\n\n* Most other objects of built-in types compare unequal unless they are\n the same object; the choice whether one object is considered smaller\n or larger than another one is made arbitrarily but consistently\n within one execution of a program.\n\nComparison of objects of the differing types depends on whether either\nof the types provide explicit support for the comparison. Most\nnumeric types can be compared with one another, but comparisons of\n``float`` and ``Decimal`` are not supported to avoid the inevitable\nconfusion arising from representation issues such as ``float(\'1.1\')``\nbeing inexactly represented and therefore not exactly equal to\n``Decimal(\'1.1\')`` which is. When cross-type comparison is not\nsupported, the comparison method returns ``NotImplemented``. This can\ncreate the illusion of non-transitivity between supported cross-type\ncomparisons and unsupported comparisons. For example, ``Decimal(2) ==\n2`` and ``2 == float(2)`` but ``Decimal(2) != float(2)``.\n\nThe operators ``in`` and ``not in`` test for membership. ``x in s``\nevaluates to true if *x* is a member of *s*, and false otherwise. ``x\nnot in s`` returns the negation of ``x in s``. All built-in sequences\nand set types support this as well as dictionary, for which ``in``\ntests whether a the dictionary has a given key. For container types\nsuch as list, tuple, set, frozenset, dict, or collections.deque, the\nexpression ``x in y`` is equivalent to ``any(x is e or x == e for e in\ny)``.\n\nFor the string and bytes types, ``x in y`` is true if and only if *x*\nis a substring of *y*. An equivalent test is ``y.find(x) != -1``.\nEmpty strings are always considered to be a substring of any other\nstring, so ``"" in "abc"`` will return ``True``.\n\nFor user-defined classes which define the ``__contains__()`` method,\n``x in y`` is true if and only if ``y.__contains__(x)`` is true.\n\nFor user-defined classes which do not define ``__contains__()`` but do\ndefine ``__iter__()``, ``x in y`` is true if some value ``z`` with ``x\n== z`` is produced while iterating over ``y``. If an exception is\nraised during the iteration, it is as if ``in`` raised that exception.\n\nLastly, the old-style iteration protocol is tried: if a class defines\n``__getitem__()``, ``x in y`` is true if and only if there is a non-\nnegative integer index *i* such that ``x == y[i]``, and all lower\ninteger indices do not raise ``IndexError`` exception. (If any other\nexception is raised, it is as if ``in`` raised that exception).\n\nThe operator ``not in`` is defined to have the inverse true value of\n``in``.\n\nThe operators ``is`` and ``is not`` test for object identity: ``x is\ny`` is true if and only if *x* and *y* are the same object. ``x is\nnot y`` yields the inverse truth value. [4]\n',
- 'compound': '\nCompound statements\n*******************\n\nCompound statements contain (groups of) other statements; they affect\nor control the execution of those other statements in some way. In\ngeneral, compound statements span multiple lines, although in simple\nincarnations a whole compound statement may be contained in one line.\n\nThe ``if``, ``while`` and ``for`` statements implement traditional\ncontrol flow constructs. ``try`` specifies exception handlers and/or\ncleanup code for a group of statements, while the ``with`` statement\nallows the execution of initialization and finalization code around a\nblock of code. Function and class definitions are also syntactically\ncompound statements.\n\nCompound statements consist of one or more \'clauses.\' A clause\nconsists of a header and a \'suite.\' The clause headers of a\nparticular compound statement are all at the same indentation level.\nEach clause header begins with a uniquely identifying keyword and ends\nwith a colon. A suite is a group of statements controlled by a\nclause. A suite can be one or more semicolon-separated simple\nstatements on the same line as the header, following the header\'s\ncolon, or it can be one or more indented statements on subsequent\nlines. Only the latter form of suite can contain nested compound\nstatements; the following is illegal, mostly because it wouldn\'t be\nclear to which ``if`` clause a following ``else`` clause would belong:\n\n if test1: if test2: print(x)\n\nAlso note that the semicolon binds tighter than the colon in this\ncontext, so that in the following example, either all or none of the\n``print()`` calls are executed:\n\n if x < y < z: print(x); print(y); print(z)\n\nSummarizing:\n\n compound_stmt ::= if_stmt\n | while_stmt\n | for_stmt\n | try_stmt\n | with_stmt\n | funcdef\n | classdef\n suite ::= stmt_list NEWLINE | NEWLINE INDENT statement+ DEDENT\n statement ::= stmt_list NEWLINE | compound_stmt\n stmt_list ::= simple_stmt (";" simple_stmt)* [";"]\n\nNote that statements always end in a ``NEWLINE`` possibly followed by\na ``DEDENT``. Also note that optional continuation clauses always\nbegin with a keyword that cannot start a statement, thus there are no\nambiguities (the \'dangling ``else``\' problem is solved in Python by\nrequiring nested ``if`` statements to be indented).\n\nThe formatting of the grammar rules in the following sections places\neach clause on a separate line for clarity.\n\n\nThe ``if`` statement\n====================\n\nThe ``if`` statement is used for conditional execution:\n\n if_stmt ::= "if" expression ":" suite\n ( "elif" expression ":" suite )*\n ["else" ":" suite]\n\nIt selects exactly one of the suites by evaluating the expressions one\nby one until one is found to be true (see section *Boolean operations*\nfor the definition of true and false); then that suite is executed\n(and no other part of the ``if`` statement is executed or evaluated).\nIf all expressions are false, the suite of the ``else`` clause, if\npresent, is executed.\n\n\nThe ``while`` statement\n=======================\n\nThe ``while`` statement is used for repeated execution as long as an\nexpression is true:\n\n while_stmt ::= "while" expression ":" suite\n ["else" ":" suite]\n\nThis repeatedly tests the expression and, if it is true, executes the\nfirst suite; if the expression is false (which may be the first time\nit is tested) the suite of the ``else`` clause, if present, is\nexecuted and the loop terminates.\n\nA ``break`` statement executed in the first suite terminates the loop\nwithout executing the ``else`` clause\'s suite. A ``continue``\nstatement executed in the first suite skips the rest of the suite and\ngoes back to testing the expression.\n\n\nThe ``for`` statement\n=====================\n\nThe ``for`` statement is used to iterate over the elements of a\nsequence (such as a string, tuple or list) or other iterable object:\n\n for_stmt ::= "for" target_list "in" expression_list ":" suite\n ["else" ":" suite]\n\nThe expression list is evaluated once; it should yield an iterable\nobject. An iterator is created for the result of the\n``expression_list``. The suite is then executed once for each item\nprovided by the iterator, in the order of ascending indices. Each\nitem in turn is assigned to the target list using the standard rules\nfor assignments (see *Assignment statements*), and then the suite is\nexecuted. When the items are exhausted (which is immediately when the\nsequence is empty or an iterator raises a ``StopIteration``\nexception), the suite in the ``else`` clause, if present, is executed,\nand the loop terminates.\n\nA ``break`` statement executed in the first suite terminates the loop\nwithout executing the ``else`` clause\'s suite. A ``continue``\nstatement executed in the first suite skips the rest of the suite and\ncontinues with the next item, or with the ``else`` clause if there was\nno next item.\n\nThe suite may assign to the variable(s) in the target list; this does\nnot affect the next item assigned to it.\n\nNames in the target list are not deleted when the loop is finished,\nbut if the sequence is empty, it will not have been assigned to at all\nby the loop. Hint: the built-in function ``range()`` returns an\niterator of integers suitable to emulate the effect of Pascal\'s ``for\ni := a to b do``; e.g., ``list(range(3))`` returns the list ``[0, 1,\n2]``.\n\nNote: There is a subtlety when the sequence is being modified by the loop\n (this can only occur for mutable sequences, i.e. lists). An\n internal counter is used to keep track of which item is used next,\n and this is incremented on each iteration. When this counter has\n reached the length of the sequence the loop terminates. This means\n that if the suite deletes the current (or a previous) item from the\n sequence, the next item will be skipped (since it gets the index of\n the current item which has already been treated). Likewise, if the\n suite inserts an item in the sequence before the current item, the\n current item will be treated again the next time through the loop.\n This can lead to nasty bugs that can be avoided by making a\n temporary copy using a slice of the whole sequence, e.g.,\n\n for x in a[:]:\n if x < 0: a.remove(x)\n\n\nThe ``try`` statement\n=====================\n\nThe ``try`` statement specifies exception handlers and/or cleanup code\nfor a group of statements:\n\n try_stmt ::= try1_stmt | try2_stmt\n try1_stmt ::= "try" ":" suite\n ("except" [expression ["as" target]] ":" suite)+\n ["else" ":" suite]\n ["finally" ":" suite]\n try2_stmt ::= "try" ":" suite\n "finally" ":" suite\n\nThe ``except`` clause(s) specify one or more exception handlers. When\nno exception occurs in the ``try`` clause, no exception handler is\nexecuted. When an exception occurs in the ``try`` suite, a search for\nan exception handler is started. This search inspects the except\nclauses in turn until one is found that matches the exception. An\nexpression-less except clause, if present, must be last; it matches\nany exception. For an except clause with an expression, that\nexpression is evaluated, and the clause matches the exception if the\nresulting object is "compatible" with the exception. An object is\ncompatible with an exception if it is the class or a base class of the\nexception object or a tuple containing an item compatible with the\nexception.\n\nIf no except clause matches the exception, the search for an exception\nhandler continues in the surrounding code and on the invocation stack.\n[1]\n\nIf the evaluation of an expression in the header of an except clause\nraises an exception, the original search for a handler is canceled and\na search starts for the new exception in the surrounding code and on\nthe call stack (it is treated as if the entire ``try`` statement\nraised the exception).\n\nWhen a matching except clause is found, the exception is assigned to\nthe target specified after the ``as`` keyword in that except clause,\nif present, and the except clause\'s suite is executed. All except\nclauses must have an executable block. When the end of this block is\nreached, execution continues normally after the entire try statement.\n(This means that if two nested handlers exist for the same exception,\nand the exception occurs in the try clause of the inner handler, the\nouter handler will not handle the exception.)\n\nWhen an exception has been assigned using ``as target``, it is cleared\nat the end of the except clause. This is as if\n\n except E as N:\n foo\n\nwas translated to\n\n except E as N:\n try:\n foo\n finally:\n del N\n\nThis means the exception must be assigned to a different name to be\nable to refer to it after the except clause. Exceptions are cleared\nbecause with the traceback attached to them, they form a reference\ncycle with the stack frame, keeping all locals in that frame alive\nuntil the next garbage collection occurs.\n\nBefore an except clause\'s suite is executed, details about the\nexception are stored in the ``sys`` module and can be access via\n``sys.exc_info()``. ``sys.exc_info()`` returns a 3-tuple consisting of\nthe exception class, the exception instance and a traceback object\n(see section *The standard type hierarchy*) identifying the point in\nthe program where the exception occurred. ``sys.exc_info()`` values\nare restored to their previous values (before the call) when returning\nfrom a function that handled an exception.\n\nThe optional ``else`` clause is executed if and when control flows off\nthe end of the ``try`` clause. [2] Exceptions in the ``else`` clause\nare not handled by the preceding ``except`` clauses.\n\nIf ``finally`` is present, it specifies a \'cleanup\' handler. The\n``try`` clause is executed, including any ``except`` and ``else``\nclauses. If an exception occurs in any of the clauses and is not\nhandled, the exception is temporarily saved. The ``finally`` clause is\nexecuted. If there is a saved exception, it is re-raised at the end\nof the ``finally`` clause. If the ``finally`` clause raises another\nexception or executes a ``return`` or ``break`` statement, the saved\nexception is set as the context of the new exception. The exception\ninformation is not available to the program during execution of the\n``finally`` clause.\n\nWhen a ``return``, ``break`` or ``continue`` statement is executed in\nthe ``try`` suite of a ``try``...``finally`` statement, the\n``finally`` clause is also executed \'on the way out.\' A ``continue``\nstatement is illegal in the ``finally`` clause. (The reason is a\nproblem with the current implementation --- this restriction may be\nlifted in the future).\n\nAdditional information on exceptions can be found in section\n*Exceptions*, and information on using the ``raise`` statement to\ngenerate exceptions may be found in section *The raise statement*.\n\n\nThe ``with`` statement\n======================\n\nThe ``with`` statement is used to wrap the execution of a block with\nmethods defined by a context manager (see section *With Statement\nContext Managers*). This allows common\n``try``...``except``...``finally`` usage patterns to be encapsulated\nfor convenient reuse.\n\n with_stmt ::= "with" with_item ("," with_item)* ":" suite\n with_item ::= expression ["as" target]\n\nThe execution of the ``with`` statement with one "item" proceeds as\nfollows:\n\n1. The context expression (the expression given in the ``with_item``)\n is evaluated to obtain a context manager.\n\n2. The context manager\'s ``__exit__()`` is loaded for later use.\n\n3. The context manager\'s ``__enter__()`` method is invoked.\n\n4. If a target was included in the ``with`` statement, the return\n value from ``__enter__()`` is assigned to it.\n\n Note: The ``with`` statement guarantees that if the ``__enter__()``\n method returns without an error, then ``__exit__()`` will always\n be called. Thus, if an error occurs during the assignment to the\n target list, it will be treated the same as an error occurring\n within the suite would be. See step 6 below.\n\n5. The suite is executed.\n\n6. The context manager\'s ``__exit__()`` method is invoked. If an\n exception caused the suite to be exited, its type, value, and\n traceback are passed as arguments to ``__exit__()``. Otherwise,\n three ``None`` arguments are supplied.\n\n If the suite was exited due to an exception, and the return value\n from the ``__exit__()`` method was false, the exception is\n reraised. If the return value was true, the exception is\n suppressed, and execution continues with the statement following\n the ``with`` statement.\n\n If the suite was exited for any reason other than an exception, the\n return value from ``__exit__()`` is ignored, and execution proceeds\n at the normal location for the kind of exit that was taken.\n\nWith more than one item, the context managers are processed as if\nmultiple ``with`` statements were nested:\n\n with A() as a, B() as b:\n suite\n\nis equivalent to\n\n with A() as a:\n with B() as b:\n suite\n\nChanged in version 3.1: Support for multiple context expressions.\n\nSee also:\n\n **PEP 0343** - The "with" statement\n The specification, background, and examples for the Python\n ``with`` statement.\n\n\nFunction definitions\n====================\n\nA function definition defines a user-defined function object (see\nsection *The standard type hierarchy*):\n\n funcdef ::= [decorators] "def" funcname "(" [parameter_list] ")" ["->" expression] ":" suite\n decorators ::= decorator+\n decorator ::= "@" dotted_name ["(" [parameter_list [","]] ")"] NEWLINE\n dotted_name ::= identifier ("." identifier)*\n parameter_list ::= (defparameter ",")*\n ( "*" [parameter] ("," defparameter)*\n [, "**" parameter]\n | "**" parameter\n | defparameter [","] )\n parameter ::= identifier [":" expression]\n defparameter ::= parameter ["=" expression]\n funcname ::= identifier\n\nA function definition is an executable statement. Its execution binds\nthe function name in the current local namespace to a function object\n(a wrapper around the executable code for the function). This\nfunction object contains a reference to the current global namespace\nas the global namespace to be used when the function is called.\n\nThe function definition does not execute the function body; this gets\nexecuted only when the function is called. [3]\n\nA function definition may be wrapped by one or more *decorator*\nexpressions. Decorator expressions are evaluated when the function is\ndefined, in the scope that contains the function definition. The\nresult must be a callable, which is invoked with the function object\nas the only argument. The returned value is bound to the function name\ninstead of the function object. Multiple decorators are applied in\nnested fashion. For example, the following code\n\n @f1(arg)\n @f2\n def func(): pass\n\nis equivalent to\n\n def func(): pass\n func = f1(arg)(f2(func))\n\nWhen one or more parameters have the form *parameter* ``=``\n*expression*, the function is said to have "default parameter values."\nFor a parameter with a default value, the corresponding argument may\nbe omitted from a call, in which case the parameter\'s default value is\nsubstituted. If a parameter has a default value, all following\nparameters up until the "``*``" must also have a default value ---\nthis is a syntactic restriction that is not expressed by the grammar.\n\n**Default parameter values are evaluated when the function definition\nis executed.** This means that the expression is evaluated once, when\nthe function is defined, and that the same "pre-computed" value is\nused for each call. This is especially important to understand when a\ndefault parameter is a mutable object, such as a list or a dictionary:\nif the function modifies the object (e.g. by appending an item to a\nlist), the default value is in effect modified. This is generally not\nwhat was intended. A way around this is to use ``None`` as the\ndefault, and explicitly test for it in the body of the function, e.g.:\n\n def whats_on_the_telly(penguin=None):\n if penguin is None:\n penguin = []\n penguin.append("property of the zoo")\n return penguin\n\nFunction call semantics are described in more detail in section\n*Calls*. A function call always assigns values to all parameters\nmentioned in the parameter list, either from position arguments, from\nkeyword arguments, or from default values. If the form\n"``*identifier``" is present, it is initialized to a tuple receiving\nany excess positional parameters, defaulting to the empty tuple. If\nthe form "``**identifier``" is present, it is initialized to a new\ndictionary receiving any excess keyword arguments, defaulting to a new\nempty dictionary. Parameters after "``*``" or "``*identifier``" are\nkeyword-only parameters and may only be passed used keyword arguments.\n\nParameters may have annotations of the form "``: expression``"\nfollowing the parameter name. Any parameter may have an annotation\neven those of the form ``*identifier`` or ``**identifier``. Functions\nmay have "return" annotation of the form "``-> expression``" after the\nparameter list. These annotations can be any valid Python expression\nand are evaluated when the function definition is executed.\nAnnotations may be evaluated in a different order than they appear in\nthe source code. The presence of annotations does not change the\nsemantics of a function. The annotation values are available as\nvalues of a dictionary keyed by the parameters\' names in the\n``__annotations__`` attribute of the function object.\n\nIt is also possible to create anonymous functions (functions not bound\nto a name), for immediate use in expressions. This uses lambda forms,\ndescribed in section *Lambdas*. Note that the lambda form is merely a\nshorthand for a simplified function definition; a function defined in\na "``def``" statement can be passed around or assigned to another name\njust like a function defined by a lambda form. The "``def``" form is\nactually more powerful since it allows the execution of multiple\nstatements and annotations.\n\n**Programmer\'s note:** Functions are first-class objects. A "``def``"\nform executed inside a function definition defines a local function\nthat can be returned or passed around. Free variables used in the\nnested function can access the local variables of the function\ncontaining the def. See section *Naming and binding* for details.\n\n\nClass definitions\n=================\n\nA class definition defines a class object (see section *The standard\ntype hierarchy*):\n\n classdef ::= [decorators] "class" classname [inheritance] ":" suite\n inheritance ::= "(" [parameter_list] ")"\n classname ::= identifier\n\nA class definition is an executable statement. The inheritance list\nusually gives a list of base classes (see *Customizing class creation*\nfor more advanced uses), so each item in the list should evaluate to a\nclass object which allows subclassing. Classes without an inheritance\nlist inherit, by default, from the base class ``object``; hence,\n\n class Foo:\n pass\n\nis equivalent to\n\n class Foo(object):\n pass\n\nThe class\'s suite is then executed in a new execution frame (see\n*Naming and binding*), using a newly created local namespace and the\noriginal global namespace. (Usually, the suite contains mostly\nfunction definitions.) When the class\'s suite finishes execution, its\nexecution frame is discarded but its local namespace is saved. [4] A\nclass object is then created using the inheritance list for the base\nclasses and the saved local namespace for the attribute dictionary.\nThe class name is bound to this class object in the original local\nnamespace.\n\nClass creation can be customized heavily using *metaclasses*.\n\nClasses can also be decorated: just like when decorating functions,\n\n @f1(arg)\n @f2\n class Foo: pass\n\nis equivalent to\n\n class Foo: pass\n Foo = f1(arg)(f2(Foo))\n\nThe evaluation rules for the decorator expressions are the same as for\nfunction decorators. The result must be a class object, which is then\nbound to the class name.\n\n**Programmer\'s note:** Variables defined in the class definition are\nclass attributes; they are shared by instances. Instance attributes\ncan be set in a method with ``self.name = value``. Both class and\ninstance attributes are accessible through the notation\n"``self.name``", and an instance attribute hides a class attribute\nwith the same name when accessed in this way. Class attributes can be\nused as defaults for instance attributes, but using mutable values\nthere can lead to unexpected results. *Descriptors* can be used to\ncreate instance variables with different implementation details.\n\nSee also:\n\n **PEP 3115** - Metaclasses in Python 3 **PEP 3129** - Class\n Decorators\n\n-[ Footnotes ]-\n\n[1] The exception is propagated to the invocation stack unless there\n is a ``finally`` clause which happens to raise another exception.\n That new exception causes the old one to be lost.\n\n[2] Currently, control "flows off the end" except in the case of an\n exception or the execution of a ``return``, ``continue``, or\n ``break`` statement.\n\n[3] A string literal appearing as the first statement in the function\n body is transformed into the function\'s ``__doc__`` attribute and\n therefore the function\'s *docstring*.\n\n[4] A string literal appearing as the first statement in the class\n body is transformed into the namespace\'s ``__doc__`` item and\n therefore the class\'s *docstring*.\n',
+ 'compound': '\nCompound statements\n*******************\n\nCompound statements contain (groups of) other statements; they affect\nor control the execution of those other statements in some way. In\ngeneral, compound statements span multiple lines, although in simple\nincarnations a whole compound statement may be contained in one line.\n\nThe ``if``, ``while`` and ``for`` statements implement traditional\ncontrol flow constructs. ``try`` specifies exception handlers and/or\ncleanup code for a group of statements, while the ``with`` statement\nallows the execution of initialization and finalization code around a\nblock of code. Function and class definitions are also syntactically\ncompound statements.\n\nCompound statements consist of one or more \'clauses.\' A clause\nconsists of a header and a \'suite.\' The clause headers of a\nparticular compound statement are all at the same indentation level.\nEach clause header begins with a uniquely identifying keyword and ends\nwith a colon. A suite is a group of statements controlled by a\nclause. A suite can be one or more semicolon-separated simple\nstatements on the same line as the header, following the header\'s\ncolon, or it can be one or more indented statements on subsequent\nlines. Only the latter form of suite can contain nested compound\nstatements; the following is illegal, mostly because it wouldn\'t be\nclear to which ``if`` clause a following ``else`` clause would belong:\n\n if test1: if test2: print(x)\n\nAlso note that the semicolon binds tighter than the colon in this\ncontext, so that in the following example, either all or none of the\n``print()`` calls are executed:\n\n if x < y < z: print(x); print(y); print(z)\n\nSummarizing:\n\n compound_stmt ::= if_stmt\n | while_stmt\n | for_stmt\n | try_stmt\n | with_stmt\n | funcdef\n | classdef\n suite ::= stmt_list NEWLINE | NEWLINE INDENT statement+ DEDENT\n statement ::= stmt_list NEWLINE | compound_stmt\n stmt_list ::= simple_stmt (";" simple_stmt)* [";"]\n\nNote that statements always end in a ``NEWLINE`` possibly followed by\na ``DEDENT``. Also note that optional continuation clauses always\nbegin with a keyword that cannot start a statement, thus there are no\nambiguities (the \'dangling ``else``\' problem is solved in Python by\nrequiring nested ``if`` statements to be indented).\n\nThe formatting of the grammar rules in the following sections places\neach clause on a separate line for clarity.\n\n\nThe ``if`` statement\n====================\n\nThe ``if`` statement is used for conditional execution:\n\n if_stmt ::= "if" expression ":" suite\n ( "elif" expression ":" suite )*\n ["else" ":" suite]\n\nIt selects exactly one of the suites by evaluating the expressions one\nby one until one is found to be true (see section *Boolean operations*\nfor the definition of true and false); then that suite is executed\n(and no other part of the ``if`` statement is executed or evaluated).\nIf all expressions are false, the suite of the ``else`` clause, if\npresent, is executed.\n\n\nThe ``while`` statement\n=======================\n\nThe ``while`` statement is used for repeated execution as long as an\nexpression is true:\n\n while_stmt ::= "while" expression ":" suite\n ["else" ":" suite]\n\nThis repeatedly tests the expression and, if it is true, executes the\nfirst suite; if the expression is false (which may be the first time\nit is tested) the suite of the ``else`` clause, if present, is\nexecuted and the loop terminates.\n\nA ``break`` statement executed in the first suite terminates the loop\nwithout executing the ``else`` clause\'s suite. A ``continue``\nstatement executed in the first suite skips the rest of the suite and\ngoes back to testing the expression.\n\n\nThe ``for`` statement\n=====================\n\nThe ``for`` statement is used to iterate over the elements of a\nsequence (such as a string, tuple or list) or other iterable object:\n\n for_stmt ::= "for" target_list "in" expression_list ":" suite\n ["else" ":" suite]\n\nThe expression list is evaluated once; it should yield an iterable\nobject. An iterator is created for the result of the\n``expression_list``. The suite is then executed once for each item\nprovided by the iterator, in the order of ascending indices. Each\nitem in turn is assigned to the target list using the standard rules\nfor assignments (see *Assignment statements*), and then the suite is\nexecuted. When the items are exhausted (which is immediately when the\nsequence is empty or an iterator raises a ``StopIteration``\nexception), the suite in the ``else`` clause, if present, is executed,\nand the loop terminates.\n\nA ``break`` statement executed in the first suite terminates the loop\nwithout executing the ``else`` clause\'s suite. A ``continue``\nstatement executed in the first suite skips the rest of the suite and\ncontinues with the next item, or with the ``else`` clause if there was\nno next item.\n\nThe suite may assign to the variable(s) in the target list; this does\nnot affect the next item assigned to it.\n\nNames in the target list are not deleted when the loop is finished,\nbut if the sequence is empty, it will not have been assigned to at all\nby the loop. Hint: the built-in function ``range()`` returns an\niterator of integers suitable to emulate the effect of Pascal\'s ``for\ni := a to b do``; e.g., ``list(range(3))`` returns the list ``[0, 1,\n2]``.\n\nNote: There is a subtlety when the sequence is being modified by the loop\n (this can only occur for mutable sequences, i.e. lists). An\n internal counter is used to keep track of which item is used next,\n and this is incremented on each iteration. When this counter has\n reached the length of the sequence the loop terminates. This means\n that if the suite deletes the current (or a previous) item from the\n sequence, the next item will be skipped (since it gets the index of\n the current item which has already been treated). Likewise, if the\n suite inserts an item in the sequence before the current item, the\n current item will be treated again the next time through the loop.\n This can lead to nasty bugs that can be avoided by making a\n temporary copy using a slice of the whole sequence, e.g.,\n\n for x in a[:]:\n if x < 0: a.remove(x)\n\n\nThe ``try`` statement\n=====================\n\nThe ``try`` statement specifies exception handlers and/or cleanup code\nfor a group of statements:\n\n try_stmt ::= try1_stmt | try2_stmt\n try1_stmt ::= "try" ":" suite\n ("except" [expression ["as" target]] ":" suite)+\n ["else" ":" suite]\n ["finally" ":" suite]\n try2_stmt ::= "try" ":" suite\n "finally" ":" suite\n\nThe ``except`` clause(s) specify one or more exception handlers. When\nno exception occurs in the ``try`` clause, no exception handler is\nexecuted. When an exception occurs in the ``try`` suite, a search for\nan exception handler is started. This search inspects the except\nclauses in turn until one is found that matches the exception. An\nexpression-less except clause, if present, must be last; it matches\nany exception. For an except clause with an expression, that\nexpression is evaluated, and the clause matches the exception if the\nresulting object is "compatible" with the exception. An object is\ncompatible with an exception if it is the class or a base class of the\nexception object or a tuple containing an item compatible with the\nexception.\n\nIf no except clause matches the exception, the search for an exception\nhandler continues in the surrounding code and on the invocation stack.\n[1]\n\nIf the evaluation of an expression in the header of an except clause\nraises an exception, the original search for a handler is canceled and\na search starts for the new exception in the surrounding code and on\nthe call stack (it is treated as if the entire ``try`` statement\nraised the exception).\n\nWhen a matching except clause is found, the exception is assigned to\nthe target specified after the ``as`` keyword in that except clause,\nif present, and the except clause\'s suite is executed. All except\nclauses must have an executable block. When the end of this block is\nreached, execution continues normally after the entire try statement.\n(This means that if two nested handlers exist for the same exception,\nand the exception occurs in the try clause of the inner handler, the\nouter handler will not handle the exception.)\n\nWhen an exception has been assigned using ``as target``, it is cleared\nat the end of the except clause. This is as if\n\n except E as N:\n foo\n\nwas translated to\n\n except E as N:\n try:\n foo\n finally:\n del N\n\nThis means the exception must be assigned to a different name to be\nable to refer to it after the except clause. Exceptions are cleared\nbecause with the traceback attached to them, they form a reference\ncycle with the stack frame, keeping all locals in that frame alive\nuntil the next garbage collection occurs.\n\nBefore an except clause\'s suite is executed, details about the\nexception are stored in the ``sys`` module and can be access via\n``sys.exc_info()``. ``sys.exc_info()`` returns a 3-tuple consisting of\nthe exception class, the exception instance and a traceback object\n(see section *The standard type hierarchy*) identifying the point in\nthe program where the exception occurred. ``sys.exc_info()`` values\nare restored to their previous values (before the call) when returning\nfrom a function that handled an exception.\n\nThe optional ``else`` clause is executed if and when control flows off\nthe end of the ``try`` clause. [2] Exceptions in the ``else`` clause\nare not handled by the preceding ``except`` clauses.\n\nIf ``finally`` is present, it specifies a \'cleanup\' handler. The\n``try`` clause is executed, including any ``except`` and ``else``\nclauses. If an exception occurs in any of the clauses and is not\nhandled, the exception is temporarily saved. The ``finally`` clause is\nexecuted. If there is a saved exception or ``break`` statement, it is\nre-raised at the end of the ``finally`` clause. If the ``finally``\nclause raises another exception the saved exception is set as the\ncontext of the new exception; if the ``finally`` clause executes a\n``return`` statement, the saved exception is discarded:\n\n def f():\n try:\n 1/0\n finally:\n return 42\n\n >>> f()\n 42\n\nThe exception information is not available to the program during\nexecution of the ``finally`` clause.\n\nWhen a ``return``, ``break`` or ``continue`` statement is executed in\nthe ``try`` suite of a ``try``...``finally`` statement, the\n``finally`` clause is also executed \'on the way out.\' A ``continue``\nstatement is illegal in the ``finally`` clause. (The reason is a\nproblem with the current implementation --- this restriction may be\nlifted in the future).\n\nAdditional information on exceptions can be found in section\n*Exceptions*, and information on using the ``raise`` statement to\ngenerate exceptions may be found in section *The raise statement*.\n\n\nThe ``with`` statement\n======================\n\nThe ``with`` statement is used to wrap the execution of a block with\nmethods defined by a context manager (see section *With Statement\nContext Managers*). This allows common\n``try``...``except``...``finally`` usage patterns to be encapsulated\nfor convenient reuse.\n\n with_stmt ::= "with" with_item ("," with_item)* ":" suite\n with_item ::= expression ["as" target]\n\nThe execution of the ``with`` statement with one "item" proceeds as\nfollows:\n\n1. The context expression (the expression given in the ``with_item``)\n is evaluated to obtain a context manager.\n\n2. The context manager\'s ``__exit__()`` is loaded for later use.\n\n3. The context manager\'s ``__enter__()`` method is invoked.\n\n4. If a target was included in the ``with`` statement, the return\n value from ``__enter__()`` is assigned to it.\n\n Note: The ``with`` statement guarantees that if the ``__enter__()``\n method returns without an error, then ``__exit__()`` will always\n be called. Thus, if an error occurs during the assignment to the\n target list, it will be treated the same as an error occurring\n within the suite would be. See step 6 below.\n\n5. The suite is executed.\n\n6. The context manager\'s ``__exit__()`` method is invoked. If an\n exception caused the suite to be exited, its type, value, and\n traceback are passed as arguments to ``__exit__()``. Otherwise,\n three ``None`` arguments are supplied.\n\n If the suite was exited due to an exception, and the return value\n from the ``__exit__()`` method was false, the exception is\n reraised. If the return value was true, the exception is\n suppressed, and execution continues with the statement following\n the ``with`` statement.\n\n If the suite was exited for any reason other than an exception, the\n return value from ``__exit__()`` is ignored, and execution proceeds\n at the normal location for the kind of exit that was taken.\n\nWith more than one item, the context managers are processed as if\nmultiple ``with`` statements were nested:\n\n with A() as a, B() as b:\n suite\n\nis equivalent to\n\n with A() as a:\n with B() as b:\n suite\n\nChanged in version 3.1: Support for multiple context expressions.\n\nSee also:\n\n **PEP 0343** - The "with" statement\n The specification, background, and examples for the Python\n ``with`` statement.\n\n\nFunction definitions\n====================\n\nA function definition defines a user-defined function object (see\nsection *The standard type hierarchy*):\n\n funcdef ::= [decorators] "def" funcname "(" [parameter_list] ")" ["->" expression] ":" suite\n decorators ::= decorator+\n decorator ::= "@" dotted_name ["(" [parameter_list [","]] ")"] NEWLINE\n dotted_name ::= identifier ("." identifier)*\n parameter_list ::= (defparameter ",")*\n ( "*" [parameter] ("," defparameter)*\n [, "**" parameter]\n | "**" parameter\n | defparameter [","] )\n parameter ::= identifier [":" expression]\n defparameter ::= parameter ["=" expression]\n funcname ::= identifier\n\nA function definition is an executable statement. Its execution binds\nthe function name in the current local namespace to a function object\n(a wrapper around the executable code for the function). This\nfunction object contains a reference to the current global namespace\nas the global namespace to be used when the function is called.\n\nThe function definition does not execute the function body; this gets\nexecuted only when the function is called. [3]\n\nA function definition may be wrapped by one or more *decorator*\nexpressions. Decorator expressions are evaluated when the function is\ndefined, in the scope that contains the function definition. The\nresult must be a callable, which is invoked with the function object\nas the only argument. The returned value is bound to the function name\ninstead of the function object. Multiple decorators are applied in\nnested fashion. For example, the following code\n\n @f1(arg)\n @f2\n def func(): pass\n\nis equivalent to\n\n def func(): pass\n func = f1(arg)(f2(func))\n\nWhen one or more parameters have the form *parameter* ``=``\n*expression*, the function is said to have "default parameter values."\nFor a parameter with a default value, the corresponding argument may\nbe omitted from a call, in which case the parameter\'s default value is\nsubstituted. If a parameter has a default value, all following\nparameters up until the "``*``" must also have a default value ---\nthis is a syntactic restriction that is not expressed by the grammar.\n\n**Default parameter values are evaluated when the function definition\nis executed.** This means that the expression is evaluated once, when\nthe function is defined, and that the same "pre-computed" value is\nused for each call. This is especially important to understand when a\ndefault parameter is a mutable object, such as a list or a dictionary:\nif the function modifies the object (e.g. by appending an item to a\nlist), the default value is in effect modified. This is generally not\nwhat was intended. A way around this is to use ``None`` as the\ndefault, and explicitly test for it in the body of the function, e.g.:\n\n def whats_on_the_telly(penguin=None):\n if penguin is None:\n penguin = []\n penguin.append("property of the zoo")\n return penguin\n\nFunction call semantics are described in more detail in section\n*Calls*. A function call always assigns values to all parameters\nmentioned in the parameter list, either from position arguments, from\nkeyword arguments, or from default values. If the form\n"``*identifier``" is present, it is initialized to a tuple receiving\nany excess positional parameters, defaulting to the empty tuple. If\nthe form "``**identifier``" is present, it is initialized to a new\ndictionary receiving any excess keyword arguments, defaulting to a new\nempty dictionary. Parameters after "``*``" or "``*identifier``" are\nkeyword-only parameters and may only be passed used keyword arguments.\n\nParameters may have annotations of the form "``: expression``"\nfollowing the parameter name. Any parameter may have an annotation\neven those of the form ``*identifier`` or ``**identifier``. Functions\nmay have "return" annotation of the form "``-> expression``" after the\nparameter list. These annotations can be any valid Python expression\nand are evaluated when the function definition is executed.\nAnnotations may be evaluated in a different order than they appear in\nthe source code. The presence of annotations does not change the\nsemantics of a function. The annotation values are available as\nvalues of a dictionary keyed by the parameters\' names in the\n``__annotations__`` attribute of the function object.\n\nIt is also possible to create anonymous functions (functions not bound\nto a name), for immediate use in expressions. This uses lambda forms,\ndescribed in section *Lambdas*. Note that the lambda form is merely a\nshorthand for a simplified function definition; a function defined in\na "``def``" statement can be passed around or assigned to another name\njust like a function defined by a lambda form. The "``def``" form is\nactually more powerful since it allows the execution of multiple\nstatements and annotations.\n\n**Programmer\'s note:** Functions are first-class objects. A "``def``"\nform executed inside a function definition defines a local function\nthat can be returned or passed around. Free variables used in the\nnested function can access the local variables of the function\ncontaining the def. See section *Naming and binding* for details.\n\nSee also:\n\n **PEP 3107** - Function Annotations\n The original specification for function annotations.\n\n\nClass definitions\n=================\n\nA class definition defines a class object (see section *The standard\ntype hierarchy*):\n\n classdef ::= [decorators] "class" classname [inheritance] ":" suite\n inheritance ::= "(" [parameter_list] ")"\n classname ::= identifier\n\nA class definition is an executable statement. The inheritance list\nusually gives a list of base classes (see *Customizing class creation*\nfor more advanced uses), so each item in the list should evaluate to a\nclass object which allows subclassing. Classes without an inheritance\nlist inherit, by default, from the base class ``object``; hence,\n\n class Foo:\n pass\n\nis equivalent to\n\n class Foo(object):\n pass\n\nThe class\'s suite is then executed in a new execution frame (see\n*Naming and binding*), using a newly created local namespace and the\noriginal global namespace. (Usually, the suite contains mostly\nfunction definitions.) When the class\'s suite finishes execution, its\nexecution frame is discarded but its local namespace is saved. [4] A\nclass object is then created using the inheritance list for the base\nclasses and the saved local namespace for the attribute dictionary.\nThe class name is bound to this class object in the original local\nnamespace.\n\nClass creation can be customized heavily using *metaclasses*.\n\nClasses can also be decorated: just like when decorating functions,\n\n @f1(arg)\n @f2\n class Foo: pass\n\nis equivalent to\n\n class Foo: pass\n Foo = f1(arg)(f2(Foo))\n\nThe evaluation rules for the decorator expressions are the same as for\nfunction decorators. The result must be a class object, which is then\nbound to the class name.\n\n**Programmer\'s note:** Variables defined in the class definition are\nclass attributes; they are shared by instances. Instance attributes\ncan be set in a method with ``self.name = value``. Both class and\ninstance attributes are accessible through the notation\n"``self.name``", and an instance attribute hides a class attribute\nwith the same name when accessed in this way. Class attributes can be\nused as defaults for instance attributes, but using mutable values\nthere can lead to unexpected results. *Descriptors* can be used to\ncreate instance variables with different implementation details.\n\nSee also:\n\n **PEP 3115** - Metaclasses in Python 3 **PEP 3129** - Class\n Decorators\n\n-[ Footnotes ]-\n\n[1] The exception is propagated to the invocation stack unless there\n is a ``finally`` clause which happens to raise another exception.\n That new exception causes the old one to be lost.\n\n[2] Currently, control "flows off the end" except in the case of an\n exception or the execution of a ``return``, ``continue``, or\n ``break`` statement.\n\n[3] A string literal appearing as the first statement in the function\n body is transformed into the function\'s ``__doc__`` attribute and\n therefore the function\'s *docstring*.\n\n[4] A string literal appearing as the first statement in the class\n body is transformed into the namespace\'s ``__doc__`` item and\n therefore the class\'s *docstring*.\n',
'context-managers': '\nWith Statement Context Managers\n*******************************\n\nA *context manager* is an object that defines the runtime context to\nbe established when executing a ``with`` statement. The context\nmanager handles the entry into, and the exit from, the desired runtime\ncontext for the execution of the block of code. Context managers are\nnormally invoked using the ``with`` statement (described in section\n*The with statement*), but can also be used by directly invoking their\nmethods.\n\nTypical uses of context managers include saving and restoring various\nkinds of global state, locking and unlocking resources, closing opened\nfiles, etc.\n\nFor more information on context managers, see *Context Manager Types*.\n\nobject.__enter__(self)\n\n Enter the runtime context related to this object. The ``with``\n statement will bind this method\'s return value to the target(s)\n specified in the ``as`` clause of the statement, if any.\n\nobject.__exit__(self, exc_type, exc_value, traceback)\n\n Exit the runtime context related to this object. The parameters\n describe the exception that caused the context to be exited. If the\n context was exited without an exception, all three arguments will\n be ``None``.\n\n If an exception is supplied, and the method wishes to suppress the\n exception (i.e., prevent it from being propagated), it should\n return a true value. Otherwise, the exception will be processed\n normally upon exit from this method.\n\n Note that ``__exit__()`` methods should not reraise the passed-in\n exception; this is the caller\'s responsibility.\n\nSee also:\n\n **PEP 0343** - The "with" statement\n The specification, background, and examples for the Python\n ``with`` statement.\n',
'continue': '\nThe ``continue`` statement\n**************************\n\n continue_stmt ::= "continue"\n\n``continue`` may only occur syntactically nested in a ``for`` or\n``while`` loop, but not nested in a function or class definition or\n``finally`` clause within that loop. It continues with the next cycle\nof the nearest enclosing loop.\n\nWhen ``continue`` passes control out of a ``try`` statement with a\n``finally`` clause, that ``finally`` clause is executed before really\nstarting the next loop cycle.\n',
'conversions': '\nArithmetic conversions\n**********************\n\nWhen a description of an arithmetic operator below uses the phrase\n"the numeric arguments are converted to a common type," this means\nthat the operator implementation for built-in types works that way:\n\n* If either argument is a complex number, the other is converted to\n complex;\n\n* otherwise, if either argument is a floating point number, the other\n is converted to floating point;\n\n* otherwise, both must be integers and no conversion is necessary.\n\nSome additional rules apply for certain operators (e.g., a string left\nargument to the \'%\' operator). Extensions must define their own\nconversion behavior.\n',
- 'customization': '\nBasic customization\n*******************\n\nobject.__new__(cls[, ...])\n\n Called to create a new instance of class *cls*. ``__new__()`` is a\n static method (special-cased so you need not declare it as such)\n that takes the class of which an instance was requested as its\n first argument. The remaining arguments are those passed to the\n object constructor expression (the call to the class). The return\n value of ``__new__()`` should be the new object instance (usually\n an instance of *cls*).\n\n Typical implementations create a new instance of the class by\n invoking the superclass\'s ``__new__()`` method using\n ``super(currentclass, cls).__new__(cls[, ...])`` with appropriate\n arguments and then modifying the newly-created instance as\n necessary before returning it.\n\n If ``__new__()`` returns an instance of *cls*, then the new\n instance\'s ``__init__()`` method will be invoked like\n ``__init__(self[, ...])``, where *self* is the new instance and the\n remaining arguments are the same as were passed to ``__new__()``.\n\n If ``__new__()`` does not return an instance of *cls*, then the new\n instance\'s ``__init__()`` method will not be invoked.\n\n ``__new__()`` is intended mainly to allow subclasses of immutable\n types (like int, str, or tuple) to customize instance creation. It\n is also commonly overridden in custom metaclasses in order to\n customize class creation.\n\nobject.__init__(self[, ...])\n\n Called when the instance is created. The arguments are those\n passed to the class constructor expression. If a base class has an\n ``__init__()`` method, the derived class\'s ``__init__()`` method,\n if any, must explicitly call it to ensure proper initialization of\n the base class part of the instance; for example:\n ``BaseClass.__init__(self, [args...])``. As a special constraint\n on constructors, no value may be returned; doing so will cause a\n ``TypeError`` to be raised at runtime.\n\nobject.__del__(self)\n\n Called when the instance is about to be destroyed. This is also\n called a destructor. If a base class has a ``__del__()`` method,\n the derived class\'s ``__del__()`` method, if any, must explicitly\n call it to ensure proper deletion of the base class part of the\n instance. Note that it is possible (though not recommended!) for\n the ``__del__()`` method to postpone destruction of the instance by\n creating a new reference to it. It may then be called at a later\n time when this new reference is deleted. It is not guaranteed that\n ``__del__()`` methods are called for objects that still exist when\n the interpreter exits.\n\n Note: ``del x`` doesn\'t directly call ``x.__del__()`` --- the former\n decrements the reference count for ``x`` by one, and the latter\n is only called when ``x``\'s reference count reaches zero. Some\n common situations that may prevent the reference count of an\n object from going to zero include: circular references between\n objects (e.g., a doubly-linked list or a tree data structure with\n parent and child pointers); a reference to the object on the\n stack frame of a function that caught an exception (the traceback\n stored in ``sys.exc_info()[2]`` keeps the stack frame alive); or\n a reference to the object on the stack frame that raised an\n unhandled exception in interactive mode (the traceback stored in\n ``sys.last_traceback`` keeps the stack frame alive). The first\n situation can only be remedied by explicitly breaking the cycles;\n the latter two situations can be resolved by storing ``None`` in\n ``sys.last_traceback``. Circular references which are garbage are\n detected when the option cycle detector is enabled (it\'s on by\n default), but can only be cleaned up if there are no Python-\n level ``__del__()`` methods involved. Refer to the documentation\n for the ``gc`` module for more information about how\n ``__del__()`` methods are handled by the cycle detector,\n particularly the description of the ``garbage`` value.\n\n Warning: Due to the precarious circumstances under which ``__del__()``\n methods are invoked, exceptions that occur during their execution\n are ignored, and a warning is printed to ``sys.stderr`` instead.\n Also, when ``__del__()`` is invoked in response to a module being\n deleted (e.g., when execution of the program is done), other\n globals referenced by the ``__del__()`` method may already have\n been deleted or in the process of being torn down (e.g. the\n import machinery shutting down). For this reason, ``__del__()``\n methods should do the absolute minimum needed to maintain\n external invariants. Starting with version 1.5, Python\n guarantees that globals whose name begins with a single\n underscore are deleted from their module before other globals are\n deleted; if no other references to such globals exist, this may\n help in assuring that imported modules are still available at the\n time when the ``__del__()`` method is called.\n\nobject.__repr__(self)\n\n Called by the ``repr()`` built-in function to compute the\n "official" string representation of an object. If at all possible,\n this should look like a valid Python expression that could be used\n to recreate an object with the same value (given an appropriate\n environment). If this is not possible, a string of the form\n ``<...some useful description...>`` should be returned. The return\n value must be a string object. If a class defines ``__repr__()``\n but not ``__str__()``, then ``__repr__()`` is also used when an\n "informal" string representation of instances of that class is\n required.\n\n This is typically used for debugging, so it is important that the\n representation is information-rich and unambiguous.\n\nobject.__str__(self)\n\n Called by the ``str()`` built-in function and by the ``print()``\n function to compute the "informal" string representation of an\n object. This differs from ``__repr__()`` in that it does not have\n to be a valid Python expression: a more convenient or concise\n representation may be used instead. The return value must be a\n string object.\n\nobject.__bytes__(self)\n\n Called by ``bytes()`` to compute a byte-string representation of an\n object. This should return a ``bytes`` object.\n\nobject.__format__(self, format_spec)\n\n Called by the ``format()`` built-in function (and by extension, the\n ``format()`` method of class ``str``) to produce a "formatted"\n string representation of an object. The ``format_spec`` argument is\n a string that contains a description of the formatting options\n desired. The interpretation of the ``format_spec`` argument is up\n to the type implementing ``__format__()``, however most classes\n will either delegate formatting to one of the built-in types, or\n use a similar formatting option syntax.\n\n See *Format Specification Mini-Language* for a description of the\n standard formatting syntax.\n\n The return value must be a string object.\n\nobject.__lt__(self, other)\nobject.__le__(self, other)\nobject.__eq__(self, other)\nobject.__ne__(self, other)\nobject.__gt__(self, other)\nobject.__ge__(self, other)\n\n These are the so-called "rich comparison" methods. The\n correspondence between operator symbols and method names is as\n follows: ``x<y`` calls ``x.__lt__(y)``, ``x<=y`` calls\n ``x.__le__(y)``, ``x==y`` calls ``x.__eq__(y)``, ``x!=y`` calls\n ``x.__ne__(y)``, ``x>y`` calls ``x.__gt__(y)``, and ``x>=y`` calls\n ``x.__ge__(y)``.\n\n A rich comparison method may return the singleton\n ``NotImplemented`` if it does not implement the operation for a\n given pair of arguments. By convention, ``False`` and ``True`` are\n returned for a successful comparison. However, these methods can\n return any value, so if the comparison operator is used in a\n Boolean context (e.g., in the condition of an ``if`` statement),\n Python will call ``bool()`` on the value to determine if the result\n is true or false.\n\n There are no implied relationships among the comparison operators.\n The truth of ``x==y`` does not imply that ``x!=y`` is false.\n Accordingly, when defining ``__eq__()``, one should also define\n ``__ne__()`` so that the operators will behave as expected. See\n the paragraph on ``__hash__()`` for some important notes on\n creating *hashable* objects which support custom comparison\n operations and are usable as dictionary keys.\n\n There are no swapped-argument versions of these methods (to be used\n when the left argument does not support the operation but the right\n argument does); rather, ``__lt__()`` and ``__gt__()`` are each\n other\'s reflection, ``__le__()`` and ``__ge__()`` are each other\'s\n reflection, and ``__eq__()`` and ``__ne__()`` are their own\n reflection.\n\n Arguments to rich comparison methods are never coerced.\n\n To automatically generate ordering operations from a single root\n operation, see ``functools.total_ordering()``.\n\nobject.__hash__(self)\n\n Called by built-in function ``hash()`` and for operations on\n members of hashed collections including ``set``, ``frozenset``, and\n ``dict``. ``__hash__()`` should return an integer. The only\n required property is that objects which compare equal have the same\n hash value; it is advised to somehow mix together (e.g. using\n exclusive or) the hash values for the components of the object that\n also play a part in comparison of objects.\n\n If a class does not define an ``__eq__()`` method it should not\n define a ``__hash__()`` operation either; if it defines\n ``__eq__()`` but not ``__hash__()``, its instances will not be\n usable as items in hashable collections. If a class defines\n mutable objects and implements an ``__eq__()`` method, it should\n not implement ``__hash__()``, since the implementation of hashable\n collections requires that a key\'s hash value is immutable (if the\n object\'s hash value changes, it will be in the wrong hash bucket).\n\n User-defined classes have ``__eq__()`` and ``__hash__()`` methods\n by default; with them, all objects compare unequal (except with\n themselves) and ``x.__hash__()`` returns ``id(x)``.\n\n Classes which inherit a ``__hash__()`` method from a parent class\n but change the meaning of ``__eq__()`` such that the hash value\n returned is no longer appropriate (e.g. by switching to a value-\n based concept of equality instead of the default identity based\n equality) can explicitly flag themselves as being unhashable by\n setting ``__hash__ = None`` in the class definition. Doing so means\n that not only will instances of the class raise an appropriate\n ``TypeError`` when a program attempts to retrieve their hash value,\n but they will also be correctly identified as unhashable when\n checking ``isinstance(obj, collections.Hashable)`` (unlike classes\n which define their own ``__hash__()`` to explicitly raise\n ``TypeError``).\n\n If a class that overrides ``__eq__()`` needs to retain the\n implementation of ``__hash__()`` from a parent class, the\n interpreter must be told this explicitly by setting ``__hash__ =\n <ParentClass>.__hash__``. Otherwise the inheritance of\n ``__hash__()`` will be blocked, just as if ``__hash__`` had been\n explicitly set to ``None``.\n\n See also the *-R* command-line option.\n\nobject.__bool__(self)\n\n Called to implement truth value testing and the built-in operation\n ``bool()``; should return ``False`` or ``True``. When this method\n is not defined, ``__len__()`` is called, if it is defined, and the\n object is considered true if its result is nonzero. If a class\n defines neither ``__len__()`` nor ``__bool__()``, all its instances\n are considered true.\n',
- 'debugger': '\n``pdb`` --- The Python Debugger\n*******************************\n\nThe module ``pdb`` defines an interactive source code debugger for\nPython programs. It supports setting (conditional) breakpoints and\nsingle stepping at the source line level, inspection of stack frames,\nsource code listing, and evaluation of arbitrary Python code in the\ncontext of any stack frame. It also supports post-mortem debugging\nand can be called under program control.\n\nThe debugger is extensible -- it is actually defined as the class\n``Pdb``. This is currently undocumented but easily understood by\nreading the source. The extension interface uses the modules ``bdb``\nand ``cmd``.\n\nThe debugger\'s prompt is ``(Pdb)``. Typical usage to run a program\nunder control of the debugger is:\n\n >>> import pdb\n >>> import mymodule\n >>> pdb.run(\'mymodule.test()\')\n > <string>(0)?()\n (Pdb) continue\n > <string>(1)?()\n (Pdb) continue\n NameError: \'spam\'\n > <string>(1)?()\n (Pdb)\n\n``pdb.py`` can also be invoked as a script to debug other scripts.\nFor example:\n\n python3 -m pdb myscript.py\n\nWhen invoked as a script, pdb will automatically enter post-mortem\ndebugging if the program being debugged exits abnormally. After post-\nmortem debugging (or after normal exit of the program), pdb will\nrestart the program. Automatic restarting preserves pdb\'s state (such\nas breakpoints) and in most cases is more useful than quitting the\ndebugger upon program\'s exit.\n\nNew in version 3.2: ``pdb.py`` now accepts a ``-c`` option that\nexecutes commands as if given in a ``.pdbrc`` file, see *Debugger\nCommands*.\n\nThe typical usage to break into the debugger from a running program is\nto insert\n\n import pdb; pdb.set_trace()\n\nat the location you want to break into the debugger. You can then\nstep through the code following this statement, and continue running\nwithout the debugger using the ``continue`` command.\n\nThe typical usage to inspect a crashed program is:\n\n >>> import pdb\n >>> import mymodule\n >>> mymodule.test()\n Traceback (most recent call last):\n File "<stdin>", line 1, in ?\n File "./mymodule.py", line 4, in test\n test2()\n File "./mymodule.py", line 3, in test2\n print(spam)\n NameError: spam\n >>> pdb.pm()\n > ./mymodule.py(3)test2()\n -> print(spam)\n (Pdb)\n\nThe module defines the following functions; each enters the debugger\nin a slightly different way:\n\npdb.run(statement, globals=None, locals=None)\n\n Execute the *statement* (given as a string or a code object) under\n debugger control. The debugger prompt appears before any code is\n executed; you can set breakpoints and type ``continue``, or you can\n step through the statement using ``step`` or ``next`` (all these\n commands are explained below). The optional *globals* and *locals*\n arguments specify the environment in which the code is executed; by\n default the dictionary of the module ``__main__`` is used. (See\n the explanation of the built-in ``exec()`` or ``eval()``\n functions.)\n\npdb.runeval(expression, globals=None, locals=None)\n\n Evaluate the *expression* (given as a string or a code object)\n under debugger control. When ``runeval()`` returns, it returns the\n value of the expression. Otherwise this function is similar to\n ``run()``.\n\npdb.runcall(function, *args, **kwds)\n\n Call the *function* (a function or method object, not a string)\n with the given arguments. When ``runcall()`` returns, it returns\n whatever the function call returned. The debugger prompt appears\n as soon as the function is entered.\n\npdb.set_trace()\n\n Enter the debugger at the calling stack frame. This is useful to\n hard-code a breakpoint at a given point in a program, even if the\n code is not otherwise being debugged (e.g. when an assertion\n fails).\n\npdb.post_mortem(traceback=None)\n\n Enter post-mortem debugging of the given *traceback* object. If no\n *traceback* is given, it uses the one of the exception that is\n currently being handled (an exception must be being handled if the\n default is to be used).\n\npdb.pm()\n\n Enter post-mortem debugging of the traceback found in\n ``sys.last_traceback``.\n\nThe ``run*`` functions and ``set_trace()`` are aliases for\ninstantiating the ``Pdb`` class and calling the method of the same\nname. If you want to access further features, you have to do this\nyourself:\n\nclass class pdb.Pdb(completekey=\'tab\', stdin=None, stdout=None, skip=None, nosigint=False)\n\n ``Pdb`` is the debugger class.\n\n The *completekey*, *stdin* and *stdout* arguments are passed to the\n underlying ``cmd.Cmd`` class; see the description there.\n\n The *skip* argument, if given, must be an iterable of glob-style\n module name patterns. The debugger will not step into frames that\n originate in a module that matches one of these patterns. [1]\n\n By default, Pdb sets a handler for the SIGINT signal (which is sent\n when the user presses Ctrl-C on the console) when you give a\n ``continue`` command. This allows you to break into the debugger\n again by pressing Ctrl-C. If you want Pdb not to touch the SIGINT\n handler, set *nosigint* tot true.\n\n Example call to enable tracing with *skip*:\n\n import pdb; pdb.Pdb(skip=[\'django.*\']).set_trace()\n\n New in version 3.1: The *skip* argument.\n\n New in version 3.2: The *nosigint* argument. Previously, a SIGINT\n handler was never set by Pdb.\n\n run(statement, globals=None, locals=None)\n runeval(expression, globals=None, locals=None)\n runcall(function, *args, **kwds)\n set_trace()\n\n See the documentation for the functions explained above.\n\n\nDebugger Commands\n=================\n\nThe commands recognized by the debugger are listed below. Most\ncommands can be abbreviated to one or two letters as indicated; e.g.\n``h(elp)`` means that either ``h`` or ``help`` can be used to enter\nthe help command (but not ``he`` or ``hel``, nor ``H`` or ``Help`` or\n``HELP``). Arguments to commands must be separated by whitespace\n(spaces or tabs). Optional arguments are enclosed in square brackets\n(``[]``) in the command syntax; the square brackets must not be typed.\nAlternatives in the command syntax are separated by a vertical bar\n(``|``).\n\nEntering a blank line repeats the last command entered. Exception: if\nthe last command was a ``list`` command, the next 11 lines are listed.\n\nCommands that the debugger doesn\'t recognize are assumed to be Python\nstatements and are executed in the context of the program being\ndebugged. Python statements can also be prefixed with an exclamation\npoint (``!``). This is a powerful way to inspect the program being\ndebugged; it is even possible to change a variable or call a function.\nWhen an exception occurs in such a statement, the exception name is\nprinted but the debugger\'s state is not changed.\n\nThe debugger supports *aliases*. Aliases can have parameters which\nallows one a certain level of adaptability to the context under\nexamination.\n\nMultiple commands may be entered on a single line, separated by\n``;;``. (A single ``;`` is not used as it is the separator for\nmultiple commands in a line that is passed to the Python parser.) No\nintelligence is applied to separating the commands; the input is split\nat the first ``;;`` pair, even if it is in the middle of a quoted\nstring.\n\nIf a file ``.pdbrc`` exists in the user\'s home directory or in the\ncurrent directory, it is read in and executed as if it had been typed\nat the debugger prompt. This is particularly useful for aliases. If\nboth files exist, the one in the home directory is read first and\naliases defined there can be overridden by the local file.\n\nChanged in version 3.2: ``.pdbrc`` can now contain commands that\ncontinue debugging, such as ``continue`` or ``next``. Previously,\nthese commands had no effect.\n\nh(elp) [command]\n\n Without argument, print the list of available commands. With a\n *command* as argument, print help about that command. ``help pdb``\n displays the full documentation (the docstring of the ``pdb``\n module). Since the *command* argument must be an identifier,\n ``help exec`` must be entered to get help on the ``!`` command.\n\nw(here)\n\n Print a stack trace, with the most recent frame at the bottom. An\n arrow indicates the current frame, which determines the context of\n most commands.\n\nd(own) [count]\n\n Move the current frame *count* (default one) levels down in the\n stack trace (to a newer frame).\n\nu(p) [count]\n\n Move the current frame *count* (default one) levels up in the stack\n trace (to an older frame).\n\nb(reak) [([filename:]lineno | function) [, condition]]\n\n With a *lineno* argument, set a break there in the current file.\n With a *function* argument, set a break at the first executable\n statement within that function. The line number may be prefixed\n with a filename and a colon, to specify a breakpoint in another\n file (probably one that hasn\'t been loaded yet). The file is\n searched on ``sys.path``. Note that each breakpoint is assigned a\n number to which all the other breakpoint commands refer.\n\n If a second argument is present, it is an expression which must\n evaluate to true before the breakpoint is honored.\n\n Without argument, list all breaks, including for each breakpoint,\n the number of times that breakpoint has been hit, the current\n ignore count, and the associated condition if any.\n\ntbreak [([filename:]lineno | function) [, condition]]\n\n Temporary breakpoint, which is removed automatically when it is\n first hit. The arguments are the same as for ``break``.\n\ncl(ear) [filename:lineno | bpnumber [bpnumber ...]]\n\n With a *filename:lineno* argument, clear all the breakpoints at\n this line. With a space separated list of breakpoint numbers, clear\n those breakpoints. Without argument, clear all breaks (but first\n ask confirmation).\n\ndisable [bpnumber [bpnumber ...]]\n\n Disable the breakpoints given as a space separated list of\n breakpoint numbers. Disabling a breakpoint means it cannot cause\n the program to stop execution, but unlike clearing a breakpoint, it\n remains in the list of breakpoints and can be (re-)enabled.\n\nenable [bpnumber [bpnumber ...]]\n\n Enable the breakpoints specified.\n\nignore bpnumber [count]\n\n Set the ignore count for the given breakpoint number. If count is\n omitted, the ignore count is set to 0. A breakpoint becomes active\n when the ignore count is zero. When non-zero, the count is\n decremented each time the breakpoint is reached and the breakpoint\n is not disabled and any associated condition evaluates to true.\n\ncondition bpnumber [condition]\n\n Set a new *condition* for the breakpoint, an expression which must\n evaluate to true before the breakpoint is honored. If *condition*\n is absent, any existing condition is removed; i.e., the breakpoint\n is made unconditional.\n\ncommands [bpnumber]\n\n Specify a list of commands for breakpoint number *bpnumber*. The\n commands themselves appear on the following lines. Type a line\n containing just ``end`` to terminate the commands. An example:\n\n (Pdb) commands 1\n (com) print some_variable\n (com) end\n (Pdb)\n\n To remove all commands from a breakpoint, type commands and follow\n it immediately with ``end``; that is, give no commands.\n\n With no *bpnumber* argument, commands refers to the last breakpoint\n set.\n\n You can use breakpoint commands to start your program up again.\n Simply use the continue command, or step, or any other command that\n resumes execution.\n\n Specifying any command resuming execution (currently continue,\n step, next, return, jump, quit and their abbreviations) terminates\n the command list (as if that command was immediately followed by\n end). This is because any time you resume execution (even with a\n simple next or step), you may encounter another breakpoint--which\n could have its own command list, leading to ambiguities about which\n list to execute.\n\n If you use the \'silent\' command in the command list, the usual\n message about stopping at a breakpoint is not printed. This may be\n desirable for breakpoints that are to print a specific message and\n then continue. If none of the other commands print anything, you\n see no sign that the breakpoint was reached.\n\ns(tep)\n\n Execute the current line, stop at the first possible occasion\n (either in a function that is called or on the next line in the\n current function).\n\nn(ext)\n\n Continue execution until the next line in the current function is\n reached or it returns. (The difference between ``next`` and\n ``step`` is that ``step`` stops inside a called function, while\n ``next`` executes called functions at (nearly) full speed, only\n stopping at the next line in the current function.)\n\nunt(il) [lineno]\n\n Without argument, continue execution until the line with a number\n greater than the current one is reached.\n\n With a line number, continue execution until a line with a number\n greater or equal to that is reached. In both cases, also stop when\n the current frame returns.\n\n Changed in version 3.2: Allow giving an explicit line number.\n\nr(eturn)\n\n Continue execution until the current function returns.\n\nc(ont(inue))\n\n Continue execution, only stop when a breakpoint is encountered.\n\nj(ump) lineno\n\n Set the next line that will be executed. Only available in the\n bottom-most frame. This lets you jump back and execute code again,\n or jump forward to skip code that you don\'t want to run.\n\n It should be noted that not all jumps are allowed -- for instance\n it is not possible to jump into the middle of a ``for`` loop or out\n of a ``finally`` clause.\n\nl(ist) [first[, last]]\n\n List source code for the current file. Without arguments, list 11\n lines around the current line or continue the previous listing.\n With ``.`` as argument, list 11 lines around the current line.\n With one argument, list 11 lines around at that line. With two\n arguments, list the given range; if the second argument is less\n than the first, it is interpreted as a count.\n\n The current line in the current frame is indicated by ``->``. If\n an exception is being debugged, the line where the exception was\n originally raised or propagated is indicated by ``>>``, if it\n differs from the current line.\n\n New in version 3.2: The ``>>`` marker.\n\nll | longlist\n\n List all source code for the current function or frame.\n Interesting lines are marked as for ``list``.\n\n New in version 3.2.\n\na(rgs)\n\n Print the argument list of the current function.\n\np(rint) expression\n\n Evaluate the *expression* in the current context and print its\n value.\n\npp expression\n\n Like the ``print`` command, except the value of the expression is\n pretty-printed using the ``pprint`` module.\n\nwhatis expression\n\n Print the type of the *expression*.\n\nsource expression\n\n Try to get source code for the given object and display it.\n\n New in version 3.2.\n\ndisplay [expression]\n\n Display the value of the expression if it changed, each time\n execution stops in the current frame.\n\n Without expression, list all display expressions for the current\n frame.\n\n New in version 3.2.\n\nundisplay [expression]\n\n Do not display the expression any more in the current frame.\n Without expression, clear all display expressions for the current\n frame.\n\n New in version 3.2.\n\ninteract\n\n Start an interative interpreter (using the ``code`` module) whose\n global namespace contains all the (global and local) names found in\n the current scope.\n\n New in version 3.2.\n\nalias [name [command]]\n\n Create an alias called *name* that executes *command*. The command\n must *not* be enclosed in quotes. Replaceable parameters can be\n indicated by ``%1``, ``%2``, and so on, while ``%*`` is replaced by\n all the parameters. If no command is given, the current alias for\n *name* is shown. If no arguments are given, all aliases are listed.\n\n Aliases may be nested and can contain anything that can be legally\n typed at the pdb prompt. Note that internal pdb commands *can* be\n overridden by aliases. Such a command is then hidden until the\n alias is removed. Aliasing is recursively applied to the first\n word of the command line; all other words in the line are left\n alone.\n\n As an example, here are two useful aliases (especially when placed\n in the ``.pdbrc`` file):\n\n # Print instance variables (usage "pi classInst")\n alias pi for k in %1.__dict__.keys(): print("%1.",k,"=",%1.__dict__[k])\n # Print instance variables in self\n alias ps pi self\n\nunalias name\n\n Delete the specified alias.\n\n! statement\n\n Execute the (one-line) *statement* in the context of the current\n stack frame. The exclamation point can be omitted unless the first\n word of the statement resembles a debugger command. To set a\n global variable, you can prefix the assignment command with a\n ``global`` statement on the same line, e.g.:\n\n (Pdb) global list_options; list_options = [\'-l\']\n (Pdb)\n\nrun [args ...]\nrestart [args ...]\n\n Restart the debugged Python program. If an argument is supplied,\n it is split with ``shlex`` and the result is used as the new\n ``sys.argv``. History, breakpoints, actions and debugger options\n are preserved. ``restart`` is an alias for ``run``.\n\nq(uit)\n\n Quit from the debugger. The program being executed is aborted.\n\n-[ Footnotes ]-\n\n[1] Whether a frame is considered to originate in a certain module is\n determined by the ``__name__`` in the frame globals.\n',
- 'del': '\nThe ``del`` statement\n*********************\n\n del_stmt ::= "del" target_list\n\nDeletion is recursively defined very similar to the way assignment is\ndefined. Rather than spelling it out in full details, here are some\nhints.\n\nDeletion of a target list recursively deletes each target, from left\nto right.\n\nDeletion of a name removes the binding of that name from the local or\nglobal namespace, depending on whether the name occurs in a ``global``\nstatement in the same code block. If the name is unbound, a\n``NameError`` exception will be raised.\n\nDeletion of attribute references, subscriptions and slicings is passed\nto the primary object involved; deletion of a slicing is in general\nequivalent to assignment of an empty slice of the right type (but even\nthis is determined by the sliced object).\n\nChanged in version 3.2.\n',
+ 'customization': '\nBasic customization\n*******************\n\nobject.__new__(cls[, ...])\n\n Called to create a new instance of class *cls*. ``__new__()`` is a\n static method (special-cased so you need not declare it as such)\n that takes the class of which an instance was requested as its\n first argument. The remaining arguments are those passed to the\n object constructor expression (the call to the class). The return\n value of ``__new__()`` should be the new object instance (usually\n an instance of *cls*).\n\n Typical implementations create a new instance of the class by\n invoking the superclass\'s ``__new__()`` method using\n ``super(currentclass, cls).__new__(cls[, ...])`` with appropriate\n arguments and then modifying the newly-created instance as\n necessary before returning it.\n\n If ``__new__()`` returns an instance of *cls*, then the new\n instance\'s ``__init__()`` method will be invoked like\n ``__init__(self[, ...])``, where *self* is the new instance and the\n remaining arguments are the same as were passed to ``__new__()``.\n\n If ``__new__()`` does not return an instance of *cls*, then the new\n instance\'s ``__init__()`` method will not be invoked.\n\n ``__new__()`` is intended mainly to allow subclasses of immutable\n types (like int, str, or tuple) to customize instance creation. It\n is also commonly overridden in custom metaclasses in order to\n customize class creation.\n\nobject.__init__(self[, ...])\n\n Called when the instance is created. The arguments are those\n passed to the class constructor expression. If a base class has an\n ``__init__()`` method, the derived class\'s ``__init__()`` method,\n if any, must explicitly call it to ensure proper initialization of\n the base class part of the instance; for example:\n ``BaseClass.__init__(self, [args...])``. As a special constraint\n on constructors, no value may be returned; doing so will cause a\n ``TypeError`` to be raised at runtime.\n\nobject.__del__(self)\n\n Called when the instance is about to be destroyed. This is also\n called a destructor. If a base class has a ``__del__()`` method,\n the derived class\'s ``__del__()`` method, if any, must explicitly\n call it to ensure proper deletion of the base class part of the\n instance. Note that it is possible (though not recommended!) for\n the ``__del__()`` method to postpone destruction of the instance by\n creating a new reference to it. It may then be called at a later\n time when this new reference is deleted. It is not guaranteed that\n ``__del__()`` methods are called for objects that still exist when\n the interpreter exits.\n\n Note: ``del x`` doesn\'t directly call ``x.__del__()`` --- the former\n decrements the reference count for ``x`` by one, and the latter\n is only called when ``x``\'s reference count reaches zero. Some\n common situations that may prevent the reference count of an\n object from going to zero include: circular references between\n objects (e.g., a doubly-linked list or a tree data structure with\n parent and child pointers); a reference to the object on the\n stack frame of a function that caught an exception (the traceback\n stored in ``sys.exc_info()[2]`` keeps the stack frame alive); or\n a reference to the object on the stack frame that raised an\n unhandled exception in interactive mode (the traceback stored in\n ``sys.last_traceback`` keeps the stack frame alive). The first\n situation can only be remedied by explicitly breaking the cycles;\n the latter two situations can be resolved by storing ``None`` in\n ``sys.last_traceback``. Circular references which are garbage are\n detected when the option cycle detector is enabled (it\'s on by\n default), but can only be cleaned up if there are no Python-\n level ``__del__()`` methods involved. Refer to the documentation\n for the ``gc`` module for more information about how\n ``__del__()`` methods are handled by the cycle detector,\n particularly the description of the ``garbage`` value.\n\n Warning: Due to the precarious circumstances under which ``__del__()``\n methods are invoked, exceptions that occur during their execution\n are ignored, and a warning is printed to ``sys.stderr`` instead.\n Also, when ``__del__()`` is invoked in response to a module being\n deleted (e.g., when execution of the program is done), other\n globals referenced by the ``__del__()`` method may already have\n been deleted or in the process of being torn down (e.g. the\n import machinery shutting down). For this reason, ``__del__()``\n methods should do the absolute minimum needed to maintain\n external invariants. Starting with version 1.5, Python\n guarantees that globals whose name begins with a single\n underscore are deleted from their module before other globals are\n deleted; if no other references to such globals exist, this may\n help in assuring that imported modules are still available at the\n time when the ``__del__()`` method is called.\n\nobject.__repr__(self)\n\n Called by the ``repr()`` built-in function to compute the\n "official" string representation of an object. If at all possible,\n this should look like a valid Python expression that could be used\n to recreate an object with the same value (given an appropriate\n environment). If this is not possible, a string of the form\n ``<...some useful description...>`` should be returned. The return\n value must be a string object. If a class defines ``__repr__()``\n but not ``__str__()``, then ``__repr__()`` is also used when an\n "informal" string representation of instances of that class is\n required.\n\n This is typically used for debugging, so it is important that the\n representation is information-rich and unambiguous.\n\nobject.__str__(self)\n\n Called by the ``str()`` built-in function and by the ``print()``\n function to compute the "informal" string representation of an\n object. This differs from ``__repr__()`` in that it does not have\n to be a valid Python expression: a more convenient or concise\n representation may be used instead. The return value must be a\n string object.\n\nobject.__bytes__(self)\n\n Called by ``bytes()`` to compute a byte-string representation of an\n object. This should return a ``bytes`` object.\n\nobject.__format__(self, format_spec)\n\n Called by the ``format()`` built-in function (and by extension, the\n ``format()`` method of class ``str``) to produce a "formatted"\n string representation of an object. The ``format_spec`` argument is\n a string that contains a description of the formatting options\n desired. The interpretation of the ``format_spec`` argument is up\n to the type implementing ``__format__()``, however most classes\n will either delegate formatting to one of the built-in types, or\n use a similar formatting option syntax.\n\n See *Format Specification Mini-Language* for a description of the\n standard formatting syntax.\n\n The return value must be a string object.\n\nobject.__lt__(self, other)\nobject.__le__(self, other)\nobject.__eq__(self, other)\nobject.__ne__(self, other)\nobject.__gt__(self, other)\nobject.__ge__(self, other)\n\n These are the so-called "rich comparison" methods. The\n correspondence between operator symbols and method names is as\n follows: ``x<y`` calls ``x.__lt__(y)``, ``x<=y`` calls\n ``x.__le__(y)``, ``x==y`` calls ``x.__eq__(y)``, ``x!=y`` calls\n ``x.__ne__(y)``, ``x>y`` calls ``x.__gt__(y)``, and ``x>=y`` calls\n ``x.__ge__(y)``.\n\n A rich comparison method may return the singleton\n ``NotImplemented`` if it does not implement the operation for a\n given pair of arguments. By convention, ``False`` and ``True`` are\n returned for a successful comparison. However, these methods can\n return any value, so if the comparison operator is used in a\n Boolean context (e.g., in the condition of an ``if`` statement),\n Python will call ``bool()`` on the value to determine if the result\n is true or false.\n\n There are no implied relationships among the comparison operators.\n The truth of ``x==y`` does not imply that ``x!=y`` is false.\n Accordingly, when defining ``__eq__()``, one should also define\n ``__ne__()`` so that the operators will behave as expected. See\n the paragraph on ``__hash__()`` for some important notes on\n creating *hashable* objects which support custom comparison\n operations and are usable as dictionary keys.\n\n There are no swapped-argument versions of these methods (to be used\n when the left argument does not support the operation but the right\n argument does); rather, ``__lt__()`` and ``__gt__()`` are each\n other\'s reflection, ``__le__()`` and ``__ge__()`` are each other\'s\n reflection, and ``__eq__()`` and ``__ne__()`` are their own\n reflection.\n\n Arguments to rich comparison methods are never coerced.\n\n To automatically generate ordering operations from a single root\n operation, see ``functools.total_ordering()``.\n\nobject.__hash__(self)\n\n Called by built-in function ``hash()`` and for operations on\n members of hashed collections including ``set``, ``frozenset``, and\n ``dict``. ``__hash__()`` should return an integer. The only\n required property is that objects which compare equal have the same\n hash value; it is advised to somehow mix together (e.g. using\n exclusive or) the hash values for the components of the object that\n also play a part in comparison of objects.\n\n If a class does not define an ``__eq__()`` method it should not\n define a ``__hash__()`` operation either; if it defines\n ``__eq__()`` but not ``__hash__()``, its instances will not be\n usable as items in hashable collections. If a class defines\n mutable objects and implements an ``__eq__()`` method, it should\n not implement ``__hash__()``, since the implementation of hashable\n collections requires that a key\'s hash value is immutable (if the\n object\'s hash value changes, it will be in the wrong hash bucket).\n\n User-defined classes have ``__eq__()`` and ``__hash__()`` methods\n by default; with them, all objects compare unequal (except with\n themselves) and ``x.__hash__()`` returns an appropriate value such\n that ``x == y`` implies both that ``x is y`` and ``hash(x) ==\n hash(y)``.\n\n Classes which inherit a ``__hash__()`` method from a parent class\n but change the meaning of ``__eq__()`` such that the hash value\n returned is no longer appropriate (e.g. by switching to a value-\n based concept of equality instead of the default identity based\n equality) can explicitly flag themselves as being unhashable by\n setting ``__hash__ = None`` in the class definition. Doing so means\n that not only will instances of the class raise an appropriate\n ``TypeError`` when a program attempts to retrieve their hash value,\n but they will also be correctly identified as unhashable when\n checking ``isinstance(obj, collections.Hashable)`` (unlike classes\n which define their own ``__hash__()`` to explicitly raise\n ``TypeError``).\n\n If a class that overrides ``__eq__()`` needs to retain the\n implementation of ``__hash__()`` from a parent class, the\n interpreter must be told this explicitly by setting ``__hash__ =\n <ParentClass>.__hash__``. Otherwise the inheritance of\n ``__hash__()`` will be blocked, just as if ``__hash__`` had been\n explicitly set to ``None``.\n\n Note: By default, the ``__hash__()`` values of str, bytes and datetime\n objects are "salted" with an unpredictable random value.\n Although they remain constant within an individual Python\n process, they are not predictable between repeated invocations of\n Python.This is intended to provide protection against a denial-\n of-service caused by carefully-chosen inputs that exploit the\n worst case performance of a dict insertion, O(n^2) complexity.\n See http://www.ocert.org/advisories/ocert-2011-003.html for\n details.Changing hash values affects the iteration order of\n dicts, sets and other mappings. Python has never made guarantees\n about this ordering (and it typically varies between 32-bit and\n 64-bit builds).See also ``PYTHONHASHSEED``.\n\n Changed in version 3.3: Hash randomization is enabled by default.\n\nobject.__bool__(self)\n\n Called to implement truth value testing and the built-in operation\n ``bool()``; should return ``False`` or ``True``. When this method\n is not defined, ``__len__()`` is called, if it is defined, and the\n object is considered true if its result is nonzero. If a class\n defines neither ``__len__()`` nor ``__bool__()``, all its instances\n are considered true.\n',
+ 'debugger': '\n``pdb`` --- The Python Debugger\n*******************************\n\nThe module ``pdb`` defines an interactive source code debugger for\nPython programs. It supports setting (conditional) breakpoints and\nsingle stepping at the source line level, inspection of stack frames,\nsource code listing, and evaluation of arbitrary Python code in the\ncontext of any stack frame. It also supports post-mortem debugging\nand can be called under program control.\n\nThe debugger is extensible -- it is actually defined as the class\n``Pdb``. This is currently undocumented but easily understood by\nreading the source. The extension interface uses the modules ``bdb``\nand ``cmd``.\n\nThe debugger\'s prompt is ``(Pdb)``. Typical usage to run a program\nunder control of the debugger is:\n\n >>> import pdb\n >>> import mymodule\n >>> pdb.run(\'mymodule.test()\')\n > <string>(0)?()\n (Pdb) continue\n > <string>(1)?()\n (Pdb) continue\n NameError: \'spam\'\n > <string>(1)?()\n (Pdb)\n\nChanged in version 3.3: Tab-completion via the ``readline`` module is\navailable for commands and command arguments, e.g. the current global\nand local names are offered as arguments of the ``print`` command.\n\n``pdb.py`` can also be invoked as a script to debug other scripts.\nFor example:\n\n python3 -m pdb myscript.py\n\nWhen invoked as a script, pdb will automatically enter post-mortem\ndebugging if the program being debugged exits abnormally. After post-\nmortem debugging (or after normal exit of the program), pdb will\nrestart the program. Automatic restarting preserves pdb\'s state (such\nas breakpoints) and in most cases is more useful than quitting the\ndebugger upon program\'s exit.\n\nNew in version 3.2: ``pdb.py`` now accepts a ``-c`` option that\nexecutes commands as if given in a ``.pdbrc`` file, see *Debugger\nCommands*.\n\nThe typical usage to break into the debugger from a running program is\nto insert\n\n import pdb; pdb.set_trace()\n\nat the location you want to break into the debugger. You can then\nstep through the code following this statement, and continue running\nwithout the debugger using the ``continue`` command.\n\nThe typical usage to inspect a crashed program is:\n\n >>> import pdb\n >>> import mymodule\n >>> mymodule.test()\n Traceback (most recent call last):\n File "<stdin>", line 1, in ?\n File "./mymodule.py", line 4, in test\n test2()\n File "./mymodule.py", line 3, in test2\n print(spam)\n NameError: spam\n >>> pdb.pm()\n > ./mymodule.py(3)test2()\n -> print(spam)\n (Pdb)\n\nThe module defines the following functions; each enters the debugger\nin a slightly different way:\n\npdb.run(statement, globals=None, locals=None)\n\n Execute the *statement* (given as a string or a code object) under\n debugger control. The debugger prompt appears before any code is\n executed; you can set breakpoints and type ``continue``, or you can\n step through the statement using ``step`` or ``next`` (all these\n commands are explained below). The optional *globals* and *locals*\n arguments specify the environment in which the code is executed; by\n default the dictionary of the module ``__main__`` is used. (See\n the explanation of the built-in ``exec()`` or ``eval()``\n functions.)\n\npdb.runeval(expression, globals=None, locals=None)\n\n Evaluate the *expression* (given as a string or a code object)\n under debugger control. When ``runeval()`` returns, it returns the\n value of the expression. Otherwise this function is similar to\n ``run()``.\n\npdb.runcall(function, *args, **kwds)\n\n Call the *function* (a function or method object, not a string)\n with the given arguments. When ``runcall()`` returns, it returns\n whatever the function call returned. The debugger prompt appears\n as soon as the function is entered.\n\npdb.set_trace()\n\n Enter the debugger at the calling stack frame. This is useful to\n hard-code a breakpoint at a given point in a program, even if the\n code is not otherwise being debugged (e.g. when an assertion\n fails).\n\npdb.post_mortem(traceback=None)\n\n Enter post-mortem debugging of the given *traceback* object. If no\n *traceback* is given, it uses the one of the exception that is\n currently being handled (an exception must be being handled if the\n default is to be used).\n\npdb.pm()\n\n Enter post-mortem debugging of the traceback found in\n ``sys.last_traceback``.\n\nThe ``run*`` functions and ``set_trace()`` are aliases for\ninstantiating the ``Pdb`` class and calling the method of the same\nname. If you want to access further features, you have to do this\nyourself:\n\nclass class pdb.Pdb(completekey=\'tab\', stdin=None, stdout=None, skip=None, nosigint=False)\n\n ``Pdb`` is the debugger class.\n\n The *completekey*, *stdin* and *stdout* arguments are passed to the\n underlying ``cmd.Cmd`` class; see the description there.\n\n The *skip* argument, if given, must be an iterable of glob-style\n module name patterns. The debugger will not step into frames that\n originate in a module that matches one of these patterns. [1]\n\n By default, Pdb sets a handler for the SIGINT signal (which is sent\n when the user presses Ctrl-C on the console) when you give a\n ``continue`` command. This allows you to break into the debugger\n again by pressing Ctrl-C. If you want Pdb not to touch the SIGINT\n handler, set *nosigint* tot true.\n\n Example call to enable tracing with *skip*:\n\n import pdb; pdb.Pdb(skip=[\'django.*\']).set_trace()\n\n New in version 3.1: The *skip* argument.\n\n New in version 3.2: The *nosigint* argument. Previously, a SIGINT\n handler was never set by Pdb.\n\n run(statement, globals=None, locals=None)\n runeval(expression, globals=None, locals=None)\n runcall(function, *args, **kwds)\n set_trace()\n\n See the documentation for the functions explained above.\n\n\nDebugger Commands\n=================\n\nThe commands recognized by the debugger are listed below. Most\ncommands can be abbreviated to one or two letters as indicated; e.g.\n``h(elp)`` means that either ``h`` or ``help`` can be used to enter\nthe help command (but not ``he`` or ``hel``, nor ``H`` or ``Help`` or\n``HELP``). Arguments to commands must be separated by whitespace\n(spaces or tabs). Optional arguments are enclosed in square brackets\n(``[]``) in the command syntax; the square brackets must not be typed.\nAlternatives in the command syntax are separated by a vertical bar\n(``|``).\n\nEntering a blank line repeats the last command entered. Exception: if\nthe last command was a ``list`` command, the next 11 lines are listed.\n\nCommands that the debugger doesn\'t recognize are assumed to be Python\nstatements and are executed in the context of the program being\ndebugged. Python statements can also be prefixed with an exclamation\npoint (``!``). This is a powerful way to inspect the program being\ndebugged; it is even possible to change a variable or call a function.\nWhen an exception occurs in such a statement, the exception name is\nprinted but the debugger\'s state is not changed.\n\nThe debugger supports *aliases*. Aliases can have parameters which\nallows one a certain level of adaptability to the context under\nexamination.\n\nMultiple commands may be entered on a single line, separated by\n``;;``. (A single ``;`` is not used as it is the separator for\nmultiple commands in a line that is passed to the Python parser.) No\nintelligence is applied to separating the commands; the input is split\nat the first ``;;`` pair, even if it is in the middle of a quoted\nstring.\n\nIf a file ``.pdbrc`` exists in the user\'s home directory or in the\ncurrent directory, it is read in and executed as if it had been typed\nat the debugger prompt. This is particularly useful for aliases. If\nboth files exist, the one in the home directory is read first and\naliases defined there can be overridden by the local file.\n\nChanged in version 3.2: ``.pdbrc`` can now contain commands that\ncontinue debugging, such as ``continue`` or ``next``. Previously,\nthese commands had no effect.\n\nh(elp) [command]\n\n Without argument, print the list of available commands. With a\n *command* as argument, print help about that command. ``help pdb``\n displays the full documentation (the docstring of the ``pdb``\n module). Since the *command* argument must be an identifier,\n ``help exec`` must be entered to get help on the ``!`` command.\n\nw(here)\n\n Print a stack trace, with the most recent frame at the bottom. An\n arrow indicates the current frame, which determines the context of\n most commands.\n\nd(own) [count]\n\n Move the current frame *count* (default one) levels down in the\n stack trace (to a newer frame).\n\nu(p) [count]\n\n Move the current frame *count* (default one) levels up in the stack\n trace (to an older frame).\n\nb(reak) [([filename:]lineno | function) [, condition]]\n\n With a *lineno* argument, set a break there in the current file.\n With a *function* argument, set a break at the first executable\n statement within that function. The line number may be prefixed\n with a filename and a colon, to specify a breakpoint in another\n file (probably one that hasn\'t been loaded yet). The file is\n searched on ``sys.path``. Note that each breakpoint is assigned a\n number to which all the other breakpoint commands refer.\n\n If a second argument is present, it is an expression which must\n evaluate to true before the breakpoint is honored.\n\n Without argument, list all breaks, including for each breakpoint,\n the number of times that breakpoint has been hit, the current\n ignore count, and the associated condition if any.\n\ntbreak [([filename:]lineno | function) [, condition]]\n\n Temporary breakpoint, which is removed automatically when it is\n first hit. The arguments are the same as for ``break``.\n\ncl(ear) [filename:lineno | bpnumber [bpnumber ...]]\n\n With a *filename:lineno* argument, clear all the breakpoints at\n this line. With a space separated list of breakpoint numbers, clear\n those breakpoints. Without argument, clear all breaks (but first\n ask confirmation).\n\ndisable [bpnumber [bpnumber ...]]\n\n Disable the breakpoints given as a space separated list of\n breakpoint numbers. Disabling a breakpoint means it cannot cause\n the program to stop execution, but unlike clearing a breakpoint, it\n remains in the list of breakpoints and can be (re-)enabled.\n\nenable [bpnumber [bpnumber ...]]\n\n Enable the breakpoints specified.\n\nignore bpnumber [count]\n\n Set the ignore count for the given breakpoint number. If count is\n omitted, the ignore count is set to 0. A breakpoint becomes active\n when the ignore count is zero. When non-zero, the count is\n decremented each time the breakpoint is reached and the breakpoint\n is not disabled and any associated condition evaluates to true.\n\ncondition bpnumber [condition]\n\n Set a new *condition* for the breakpoint, an expression which must\n evaluate to true before the breakpoint is honored. If *condition*\n is absent, any existing condition is removed; i.e., the breakpoint\n is made unconditional.\n\ncommands [bpnumber]\n\n Specify a list of commands for breakpoint number *bpnumber*. The\n commands themselves appear on the following lines. Type a line\n containing just ``end`` to terminate the commands. An example:\n\n (Pdb) commands 1\n (com) print some_variable\n (com) end\n (Pdb)\n\n To remove all commands from a breakpoint, type commands and follow\n it immediately with ``end``; that is, give no commands.\n\n With no *bpnumber* argument, commands refers to the last breakpoint\n set.\n\n You can use breakpoint commands to start your program up again.\n Simply use the continue command, or step, or any other command that\n resumes execution.\n\n Specifying any command resuming execution (currently continue,\n step, next, return, jump, quit and their abbreviations) terminates\n the command list (as if that command was immediately followed by\n end). This is because any time you resume execution (even with a\n simple next or step), you may encounter another breakpoint--which\n could have its own command list, leading to ambiguities about which\n list to execute.\n\n If you use the \'silent\' command in the command list, the usual\n message about stopping at a breakpoint is not printed. This may be\n desirable for breakpoints that are to print a specific message and\n then continue. If none of the other commands print anything, you\n see no sign that the breakpoint was reached.\n\ns(tep)\n\n Execute the current line, stop at the first possible occasion\n (either in a function that is called or on the next line in the\n current function).\n\nn(ext)\n\n Continue execution until the next line in the current function is\n reached or it returns. (The difference between ``next`` and\n ``step`` is that ``step`` stops inside a called function, while\n ``next`` executes called functions at (nearly) full speed, only\n stopping at the next line in the current function.)\n\nunt(il) [lineno]\n\n Without argument, continue execution until the line with a number\n greater than the current one is reached.\n\n With a line number, continue execution until a line with a number\n greater or equal to that is reached. In both cases, also stop when\n the current frame returns.\n\n Changed in version 3.2: Allow giving an explicit line number.\n\nr(eturn)\n\n Continue execution until the current function returns.\n\nc(ont(inue))\n\n Continue execution, only stop when a breakpoint is encountered.\n\nj(ump) lineno\n\n Set the next line that will be executed. Only available in the\n bottom-most frame. This lets you jump back and execute code again,\n or jump forward to skip code that you don\'t want to run.\n\n It should be noted that not all jumps are allowed -- for instance\n it is not possible to jump into the middle of a ``for`` loop or out\n of a ``finally`` clause.\n\nl(ist) [first[, last]]\n\n List source code for the current file. Without arguments, list 11\n lines around the current line or continue the previous listing.\n With ``.`` as argument, list 11 lines around the current line.\n With one argument, list 11 lines around at that line. With two\n arguments, list the given range; if the second argument is less\n than the first, it is interpreted as a count.\n\n The current line in the current frame is indicated by ``->``. If\n an exception is being debugged, the line where the exception was\n originally raised or propagated is indicated by ``>>``, if it\n differs from the current line.\n\n New in version 3.2: The ``>>`` marker.\n\nll | longlist\n\n List all source code for the current function or frame.\n Interesting lines are marked as for ``list``.\n\n New in version 3.2.\n\na(rgs)\n\n Print the argument list of the current function.\n\np(rint) expression\n\n Evaluate the *expression* in the current context and print its\n value.\n\npp expression\n\n Like the ``print`` command, except the value of the expression is\n pretty-printed using the ``pprint`` module.\n\nwhatis expression\n\n Print the type of the *expression*.\n\nsource expression\n\n Try to get source code for the given object and display it.\n\n New in version 3.2.\n\ndisplay [expression]\n\n Display the value of the expression if it changed, each time\n execution stops in the current frame.\n\n Without expression, list all display expressions for the current\n frame.\n\n New in version 3.2.\n\nundisplay [expression]\n\n Do not display the expression any more in the current frame.\n Without expression, clear all display expressions for the current\n frame.\n\n New in version 3.2.\n\ninteract\n\n Start an interative interpreter (using the ``code`` module) whose\n global namespace contains all the (global and local) names found in\n the current scope.\n\n New in version 3.2.\n\nalias [name [command]]\n\n Create an alias called *name* that executes *command*. The command\n must *not* be enclosed in quotes. Replaceable parameters can be\n indicated by ``%1``, ``%2``, and so on, while ``%*`` is replaced by\n all the parameters. If no command is given, the current alias for\n *name* is shown. If no arguments are given, all aliases are listed.\n\n Aliases may be nested and can contain anything that can be legally\n typed at the pdb prompt. Note that internal pdb commands *can* be\n overridden by aliases. Such a command is then hidden until the\n alias is removed. Aliasing is recursively applied to the first\n word of the command line; all other words in the line are left\n alone.\n\n As an example, here are two useful aliases (especially when placed\n in the ``.pdbrc`` file):\n\n # Print instance variables (usage "pi classInst")\n alias pi for k in %1.__dict__.keys(): print("%1.",k,"=",%1.__dict__[k])\n # Print instance variables in self\n alias ps pi self\n\nunalias name\n\n Delete the specified alias.\n\n! statement\n\n Execute the (one-line) *statement* in the context of the current\n stack frame. The exclamation point can be omitted unless the first\n word of the statement resembles a debugger command. To set a\n global variable, you can prefix the assignment command with a\n ``global`` statement on the same line, e.g.:\n\n (Pdb) global list_options; list_options = [\'-l\']\n (Pdb)\n\nrun [args ...]\nrestart [args ...]\n\n Restart the debugged Python program. If an argument is supplied,\n it is split with ``shlex`` and the result is used as the new\n ``sys.argv``. History, breakpoints, actions and debugger options\n are preserved. ``restart`` is an alias for ``run``.\n\nq(uit)\n\n Quit from the debugger. The program being executed is aborted.\n\n-[ Footnotes ]-\n\n[1] Whether a frame is considered to originate in a certain module is\n determined by the ``__name__`` in the frame globals.\n',
+ 'del': '\nThe ``del`` statement\n*********************\n\n del_stmt ::= "del" target_list\n\nDeletion is recursively defined very similar to the way assignment is\ndefined. Rather than spelling it out in full details, here are some\nhints.\n\nDeletion of a target list recursively deletes each target, from left\nto right.\n\nDeletion of a name removes the binding of that name from the local or\nglobal namespace, depending on whether the name occurs in a ``global``\nstatement in the same code block. If the name is unbound, a\n``NameError`` exception will be raised.\n\nDeletion of attribute references, subscriptions and slicings is passed\nto the primary object involved; deletion of a slicing is in general\nequivalent to assignment of an empty slice of the right type (but even\nthis is determined by the sliced object).\n\nChanged in version 3.2: Previously it was illegal to delete a name\nfrom the local namespace if it occurs as a free variable in a nested\nblock.\n',
'dict': '\nDictionary displays\n*******************\n\nA dictionary display is a possibly empty series of key/datum pairs\nenclosed in curly braces:\n\n dict_display ::= "{" [key_datum_list | dict_comprehension] "}"\n key_datum_list ::= key_datum ("," key_datum)* [","]\n key_datum ::= expression ":" expression\n dict_comprehension ::= expression ":" expression comp_for\n\nA dictionary display yields a new dictionary object.\n\nIf a comma-separated sequence of key/datum pairs is given, they are\nevaluated from left to right to define the entries of the dictionary:\neach key object is used as a key into the dictionary to store the\ncorresponding datum. This means that you can specify the same key\nmultiple times in the key/datum list, and the final dictionary\'s value\nfor that key will be the last one given.\n\nA dict comprehension, in contrast to list and set comprehensions,\nneeds two expressions separated with a colon followed by the usual\n"for" and "if" clauses. When the comprehension is run, the resulting\nkey and value elements are inserted in the new dictionary in the order\nthey are produced.\n\nRestrictions on the types of the key values are listed earlier in\nsection *The standard type hierarchy*. (To summarize, the key type\nshould be *hashable*, which excludes all mutable objects.) Clashes\nbetween duplicate keys are not detected; the last datum (textually\nrightmost in the display) stored for a given key value prevails.\n',
'dynamic-features': '\nInteraction with dynamic features\n*********************************\n\nThere are several cases where Python statements are illegal when used\nin conjunction with nested scopes that contain free variables.\n\nIf a variable is referenced in an enclosing scope, it is illegal to\ndelete the name. An error will be reported at compile time.\n\nIf the wild card form of import --- ``import *`` --- is used in a\nfunction and the function contains or is a nested block with free\nvariables, the compiler will raise a ``SyntaxError``.\n\nThe ``eval()`` and ``exec()`` functions do not have access to the full\nenvironment for resolving names. Names may be resolved in the local\nand global namespaces of the caller. Free variables are not resolved\nin the nearest enclosing namespace, but in the global namespace. [1]\nThe ``exec()`` and ``eval()`` functions have optional arguments to\noverride the global and local namespace. If only one namespace is\nspecified, it is used for both.\n',
'else': '\nThe ``if`` statement\n********************\n\nThe ``if`` statement is used for conditional execution:\n\n if_stmt ::= "if" expression ":" suite\n ( "elif" expression ":" suite )*\n ["else" ":" suite]\n\nIt selects exactly one of the suites by evaluating the expressions one\nby one until one is found to be true (see section *Boolean operations*\nfor the definition of true and false); then that suite is executed\n(and no other part of the ``if`` statement is executed or evaluated).\nIf all expressions are false, the suite of the ``else`` clause, if\npresent, is executed.\n',
@@ -33,14 +34,14 @@ topics = {'assert': '\nThe ``assert`` statement\n************************\n\nAss
'exprlists': '\nExpression lists\n****************\n\n expression_list ::= expression ( "," expression )* [","]\n\nAn expression list containing at least one comma yields a tuple. The\nlength of the tuple is the number of expressions in the list. The\nexpressions are evaluated from left to right.\n\nThe trailing comma is required only to create a single tuple (a.k.a. a\n*singleton*); it is optional in all other cases. A single expression\nwithout a trailing comma doesn\'t create a tuple, but rather yields the\nvalue of that expression. (To create an empty tuple, use an empty pair\nof parentheses: ``()``.)\n',
'floating': '\nFloating point literals\n***********************\n\nFloating point literals are described by the following lexical\ndefinitions:\n\n floatnumber ::= pointfloat | exponentfloat\n pointfloat ::= [intpart] fraction | intpart "."\n exponentfloat ::= (intpart | pointfloat) exponent\n intpart ::= digit+\n fraction ::= "." digit+\n exponent ::= ("e" | "E") ["+" | "-"] digit+\n\nNote that the integer and exponent parts are always interpreted using\nradix 10. For example, ``077e010`` is legal, and denotes the same\nnumber as ``77e10``. The allowed range of floating point literals is\nimplementation-dependent. Some examples of floating point literals:\n\n 3.14 10. .001 1e100 3.14e-10 0e0\n\nNote that numeric literals do not include a sign; a phrase like ``-1``\nis actually an expression composed of the unary operator ``-`` and the\nliteral ``1``.\n',
'for': '\nThe ``for`` statement\n*********************\n\nThe ``for`` statement is used to iterate over the elements of a\nsequence (such as a string, tuple or list) or other iterable object:\n\n for_stmt ::= "for" target_list "in" expression_list ":" suite\n ["else" ":" suite]\n\nThe expression list is evaluated once; it should yield an iterable\nobject. An iterator is created for the result of the\n``expression_list``. The suite is then executed once for each item\nprovided by the iterator, in the order of ascending indices. Each\nitem in turn is assigned to the target list using the standard rules\nfor assignments (see *Assignment statements*), and then the suite is\nexecuted. When the items are exhausted (which is immediately when the\nsequence is empty or an iterator raises a ``StopIteration``\nexception), the suite in the ``else`` clause, if present, is executed,\nand the loop terminates.\n\nA ``break`` statement executed in the first suite terminates the loop\nwithout executing the ``else`` clause\'s suite. A ``continue``\nstatement executed in the first suite skips the rest of the suite and\ncontinues with the next item, or with the ``else`` clause if there was\nno next item.\n\nThe suite may assign to the variable(s) in the target list; this does\nnot affect the next item assigned to it.\n\nNames in the target list are not deleted when the loop is finished,\nbut if the sequence is empty, it will not have been assigned to at all\nby the loop. Hint: the built-in function ``range()`` returns an\niterator of integers suitable to emulate the effect of Pascal\'s ``for\ni := a to b do``; e.g., ``list(range(3))`` returns the list ``[0, 1,\n2]``.\n\nNote: There is a subtlety when the sequence is being modified by the loop\n (this can only occur for mutable sequences, i.e. lists). An\n internal counter is used to keep track of which item is used next,\n and this is incremented on each iteration. When this counter has\n reached the length of the sequence the loop terminates. This means\n that if the suite deletes the current (or a previous) item from the\n sequence, the next item will be skipped (since it gets the index of\n the current item which has already been treated). Likewise, if the\n suite inserts an item in the sequence before the current item, the\n current item will be treated again the next time through the loop.\n This can lead to nasty bugs that can be avoided by making a\n temporary copy using a slice of the whole sequence, e.g.,\n\n for x in a[:]:\n if x < 0: a.remove(x)\n',
- 'formatstrings': '\nFormat String Syntax\n********************\n\nThe ``str.format()`` method and the ``Formatter`` class share the same\nsyntax for format strings (although in the case of ``Formatter``,\nsubclasses can define their own format string syntax).\n\nFormat strings contain "replacement fields" surrounded by curly braces\n``{}``. Anything that is not contained in braces is considered literal\ntext, which is copied unchanged to the output. If you need to include\na brace character in the literal text, it can be escaped by doubling:\n``{{`` and ``}}``.\n\nThe grammar for a replacement field is as follows:\n\n replacement_field ::= "{" [field_name] ["!" conversion] [":" format_spec] "}"\n field_name ::= arg_name ("." attribute_name | "[" element_index "]")*\n arg_name ::= [identifier | integer]\n attribute_name ::= identifier\n element_index ::= integer | index_string\n index_string ::= <any source character except "]"> +\n conversion ::= "r" | "s" | "a"\n format_spec ::= <described in the next section>\n\nIn less formal terms, the replacement field can start with a\n*field_name* that specifies the object whose value is to be formatted\nand inserted into the output instead of the replacement field. The\n*field_name* is optionally followed by a *conversion* field, which is\npreceded by an exclamation point ``\'!\'``, and a *format_spec*, which\nis preceded by a colon ``\':\'``. These specify a non-default format\nfor the replacement value.\n\nSee also the *Format Specification Mini-Language* section.\n\nThe *field_name* itself begins with an *arg_name* that is either a\nnumber or a keyword. If it\'s a number, it refers to a positional\nargument, and if it\'s a keyword, it refers to a named keyword\nargument. If the numerical arg_names in a format string are 0, 1, 2,\n... in sequence, they can all be omitted (not just some) and the\nnumbers 0, 1, 2, ... will be automatically inserted in that order.\nBecause *arg_name* is not quote-delimited, it is not possible to\nspecify arbitrary dictionary keys (e.g., the strings ``\'10\'`` or\n``\':-]\'``) within a format string. The *arg_name* can be followed by\nany number of index or attribute expressions. An expression of the\nform ``\'.name\'`` selects the named attribute using ``getattr()``,\nwhile an expression of the form ``\'[index]\'`` does an index lookup\nusing ``__getitem__()``.\n\nChanged in version 3.1: The positional argument specifiers can be\nomitted, so ``\'{} {}\'`` is equivalent to ``\'{0} {1}\'``.\n\nSome simple format string examples:\n\n "First, thou shalt count to {0}" # References first positional argument\n "Bring me a {}" # Implicitly references the first positional argument\n "From {} to {}" # Same as "From {0} to {1}"\n "My quest is {name}" # References keyword argument \'name\'\n "Weight in tons {0.weight}" # \'weight\' attribute of first positional arg\n "Units destroyed: {players[0]}" # First element of keyword argument \'players\'.\n\nThe *conversion* field causes a type coercion before formatting.\nNormally, the job of formatting a value is done by the\n``__format__()`` method of the value itself. However, in some cases\nit is desirable to force a type to be formatted as a string,\noverriding its own definition of formatting. By converting the value\nto a string before calling ``__format__()``, the normal formatting\nlogic is bypassed.\n\nThree conversion flags are currently supported: ``\'!s\'`` which calls\n``str()`` on the value, ``\'!r\'`` which calls ``repr()`` and ``\'!a\'``\nwhich calls ``ascii()``.\n\nSome examples:\n\n "Harold\'s a clever {0!s}" # Calls str() on the argument first\n "Bring out the holy {name!r}" # Calls repr() on the argument first\n "More {!a}" # Calls ascii() on the argument first\n\nThe *format_spec* field contains a specification of how the value\nshould be presented, including such details as field width, alignment,\npadding, decimal precision and so on. Each value type can define its\nown "formatting mini-language" or interpretation of the *format_spec*.\n\nMost built-in types support a common formatting mini-language, which\nis described in the next section.\n\nA *format_spec* field can also include nested replacement fields\nwithin it. These nested replacement fields can contain only a field\nname; conversion flags and format specifications are not allowed. The\nreplacement fields within the format_spec are substituted before the\n*format_spec* string is interpreted. This allows the formatting of a\nvalue to be dynamically specified.\n\nSee the *Format examples* section for some examples.\n\n\nFormat Specification Mini-Language\n==================================\n\n"Format specifications" are used within replacement fields contained\nwithin a format string to define how individual values are presented\n(see *Format String Syntax*). They can also be passed directly to the\nbuilt-in ``format()`` function. Each formattable type may define how\nthe format specification is to be interpreted.\n\nMost built-in types implement the following options for format\nspecifications, although some of the formatting options are only\nsupported by the numeric types.\n\nA general convention is that an empty format string (``""``) produces\nthe same result as if you had called ``str()`` on the value. A non-\nempty format string typically modifies the result.\n\nThe general form of a *standard format specifier* is:\n\n format_spec ::= [[fill]align][sign][#][0][width][,][.precision][type]\n fill ::= <a character other than \'}\'>\n align ::= "<" | ">" | "=" | "^"\n sign ::= "+" | "-" | " "\n width ::= integer\n precision ::= integer\n type ::= "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"\n\nThe *fill* character can be any character other than \'{\' or \'}\'. The\npresence of a fill character is signaled by the character following\nit, which must be one of the alignment options. If the second\ncharacter of *format_spec* is not a valid alignment option, then it is\nassumed that both the fill character and the alignment option are\nabsent.\n\nThe meaning of the various alignment options is as follows:\n\n +-----------+------------------------------------------------------------+\n | Option | Meaning |\n +===========+============================================================+\n | ``\'<\'`` | Forces the field to be left-aligned within the available |\n | | space (this is the default for most objects). |\n +-----------+------------------------------------------------------------+\n | ``\'>\'`` | Forces the field to be right-aligned within the available |\n | | space (this is the default for numbers). |\n +-----------+------------------------------------------------------------+\n | ``\'=\'`` | Forces the padding to be placed after the sign (if any) |\n | | but before the digits. This is used for printing fields |\n | | in the form \'+000000120\'. This alignment option is only |\n | | valid for numeric types. |\n +-----------+------------------------------------------------------------+\n | ``\'^\'`` | Forces the field to be centered within the available |\n | | space. |\n +-----------+------------------------------------------------------------+\n\nNote that unless a minimum field width is defined, the field width\nwill always be the same size as the data to fill it, so that the\nalignment option has no meaning in this case.\n\nThe *sign* option is only valid for number types, and can be one of\nthe following:\n\n +-----------+------------------------------------------------------------+\n | Option | Meaning |\n +===========+============================================================+\n | ``\'+\'`` | indicates that a sign should be used for both positive as |\n | | well as negative numbers. |\n +-----------+------------------------------------------------------------+\n | ``\'-\'`` | indicates that a sign should be used only for negative |\n | | numbers (this is the default behavior). |\n +-----------+------------------------------------------------------------+\n | space | indicates that a leading space should be used on positive |\n | | numbers, and a minus sign on negative numbers. |\n +-----------+------------------------------------------------------------+\n\nThe ``\'#\'`` option causes the "alternate form" to be used for the\nconversion. The alternate form is defined differently for different\ntypes. This option is only valid for integer, float, complex and\nDecimal types. For integers, when binary, octal, or hexadecimal output\nis used, this option adds the prefix respective ``\'0b\'``, ``\'0o\'``, or\n``\'0x\'`` to the output value. For floats, complex and Decimal the\nalternate form causes the result of the conversion to always contain a\ndecimal-point character, even if no digits follow it. Normally, a\ndecimal-point character appears in the result of these conversions\nonly if a digit follows it. In addition, for ``\'g\'`` and ``\'G\'``\nconversions, trailing zeros are not removed from the result.\n\nThe ``\',\'`` option signals the use of a comma for a thousands\nseparator. For a locale aware separator, use the ``\'n\'`` integer\npresentation type instead.\n\nChanged in version 3.1: Added the ``\',\'`` option (see also **PEP\n378**).\n\n*width* is a decimal integer defining the minimum field width. If not\nspecified, then the field width will be determined by the content.\n\nIf the *width* field is preceded by a zero (``\'0\'``) character, this\nenables zero-padding. This is equivalent to an *alignment* type of\n``\'=\'`` and a *fill* character of ``\'0\'``.\n\nThe *precision* is a decimal number indicating how many digits should\nbe displayed after the decimal point for a floating point value\nformatted with ``\'f\'`` and ``\'F\'``, or before and after the decimal\npoint for a floating point value formatted with ``\'g\'`` or ``\'G\'``.\nFor non-number types the field indicates the maximum field size - in\nother words, how many characters will be used from the field content.\nThe *precision* is not allowed for integer values.\n\nFinally, the *type* determines how the data should be presented.\n\nThe available string presentation types are:\n\n +-----------+------------------------------------------------------------+\n | Type | Meaning |\n +===========+============================================================+\n | ``\'s\'`` | String format. This is the default type for strings and |\n | | may be omitted. |\n +-----------+------------------------------------------------------------+\n | None | The same as ``\'s\'``. |\n +-----------+------------------------------------------------------------+\n\nThe available integer presentation types are:\n\n +-----------+------------------------------------------------------------+\n | Type | Meaning |\n +===========+============================================================+\n | ``\'b\'`` | Binary format. Outputs the number in base 2. |\n +-----------+------------------------------------------------------------+\n | ``\'c\'`` | Character. Converts the integer to the corresponding |\n | | unicode character before printing. |\n +-----------+------------------------------------------------------------+\n | ``\'d\'`` | Decimal Integer. Outputs the number in base 10. |\n +-----------+------------------------------------------------------------+\n | ``\'o\'`` | Octal format. Outputs the number in base 8. |\n +-----------+------------------------------------------------------------+\n | ``\'x\'`` | Hex format. Outputs the number in base 16, using lower- |\n | | case letters for the digits above 9. |\n +-----------+------------------------------------------------------------+\n | ``\'X\'`` | Hex format. Outputs the number in base 16, using upper- |\n | | case letters for the digits above 9. |\n +-----------+------------------------------------------------------------+\n | ``\'n\'`` | Number. This is the same as ``\'d\'``, except that it uses |\n | | the current locale setting to insert the appropriate |\n | | number separator characters. |\n +-----------+------------------------------------------------------------+\n | None | The same as ``\'d\'``. |\n +-----------+------------------------------------------------------------+\n\nIn addition to the above presentation types, integers can be formatted\nwith the floating point presentation types listed below (except\n``\'n\'`` and None). When doing so, ``float()`` is used to convert the\ninteger to a floating point number before formatting.\n\nThe available presentation types for floating point and decimal values\nare:\n\n +-----------+------------------------------------------------------------+\n | Type | Meaning |\n +===========+============================================================+\n | ``\'e\'`` | Exponent notation. Prints the number in scientific |\n | | notation using the letter \'e\' to indicate the exponent. |\n +-----------+------------------------------------------------------------+\n | ``\'E\'`` | Exponent notation. Same as ``\'e\'`` except it uses an upper |\n | | case \'E\' as the separator character. |\n +-----------+------------------------------------------------------------+\n | ``\'f\'`` | Fixed point. Displays the number as a fixed-point number. |\n +-----------+------------------------------------------------------------+\n | ``\'F\'`` | Fixed point. Same as ``\'f\'``, but converts ``nan`` to |\n | | ``NAN`` and ``inf`` to ``INF``. |\n +-----------+------------------------------------------------------------+\n | ``\'g\'`` | General format. For a given precision ``p >= 1``, this |\n | | rounds the number to ``p`` significant digits and then |\n | | formats the result in either fixed-point format or in |\n | | scientific notation, depending on its magnitude. The |\n | | precise rules are as follows: suppose that the result |\n | | formatted with presentation type ``\'e\'`` and precision |\n | | ``p-1`` would have exponent ``exp``. Then if ``-4 <= exp |\n | | < p``, the number is formatted with presentation type |\n | | ``\'f\'`` and precision ``p-1-exp``. Otherwise, the number |\n | | is formatted with presentation type ``\'e\'`` and precision |\n | | ``p-1``. In both cases insignificant trailing zeros are |\n | | removed from the significand, and the decimal point is |\n | | also removed if there are no remaining digits following |\n | | it. Positive and negative infinity, positive and negative |\n | | zero, and nans, are formatted as ``inf``, ``-inf``, ``0``, |\n | | ``-0`` and ``nan`` respectively, regardless of the |\n | | precision. A precision of ``0`` is treated as equivalent |\n | | to a precision of ``1``. |\n +-----------+------------------------------------------------------------+\n | ``\'G\'`` | General format. Same as ``\'g\'`` except switches to ``\'E\'`` |\n | | if the number gets too large. The representations of |\n | | infinity and NaN are uppercased, too. |\n +-----------+------------------------------------------------------------+\n | ``\'n\'`` | Number. This is the same as ``\'g\'``, except that it uses |\n | | the current locale setting to insert the appropriate |\n | | number separator characters. |\n +-----------+------------------------------------------------------------+\n | ``\'%\'`` | Percentage. Multiplies the number by 100 and displays in |\n | | fixed (``\'f\'``) format, followed by a percent sign. |\n +-----------+------------------------------------------------------------+\n | None | Similar to ``\'g\'``, except with at least one digit past |\n | | the decimal point and a default precision of 12. This is |\n | | intended to match ``str()``, except you can add the other |\n | | format modifiers. |\n +-----------+------------------------------------------------------------+\n\n\nFormat examples\n===============\n\nThis section contains examples of the new format syntax and comparison\nwith the old ``%``-formatting.\n\nIn most of the cases the syntax is similar to the old\n``%``-formatting, with the addition of the ``{}`` and with ``:`` used\ninstead of ``%``. For example, ``\'%03.2f\'`` can be translated to\n``\'{:03.2f}\'``.\n\nThe new format syntax also supports new and different options, shown\nin the follow examples.\n\nAccessing arguments by position:\n\n >>> \'{0}, {1}, {2}\'.format(\'a\', \'b\', \'c\')\n \'a, b, c\'\n >>> \'{}, {}, {}\'.format(\'a\', \'b\', \'c\') # 3.1+ only\n \'a, b, c\'\n >>> \'{2}, {1}, {0}\'.format(\'a\', \'b\', \'c\')\n \'c, b, a\'\n >>> \'{2}, {1}, {0}\'.format(*\'abc\') # unpacking argument sequence\n \'c, b, a\'\n >>> \'{0}{1}{0}\'.format(\'abra\', \'cad\') # arguments\' indices can be repeated\n \'abracadabra\'\n\nAccessing arguments by name:\n\n >>> \'Coordinates: {latitude}, {longitude}\'.format(latitude=\'37.24N\', longitude=\'-115.81W\')\n \'Coordinates: 37.24N, -115.81W\'\n >>> coord = {\'latitude\': \'37.24N\', \'longitude\': \'-115.81W\'}\n >>> \'Coordinates: {latitude}, {longitude}\'.format(**coord)\n \'Coordinates: 37.24N, -115.81W\'\n\nAccessing arguments\' attributes:\n\n >>> c = 3-5j\n >>> (\'The complex number {0} is formed from the real part {0.real} \'\n ... \'and the imaginary part {0.imag}.\').format(c)\n \'The complex number (3-5j) is formed from the real part 3.0 and the imaginary part -5.0.\'\n >>> class Point:\n ... def __init__(self, x, y):\n ... self.x, self.y = x, y\n ... def __str__(self):\n ... return \'Point({self.x}, {self.y})\'.format(self=self)\n ...\n >>> str(Point(4, 2))\n \'Point(4, 2)\'\n\nAccessing arguments\' items:\n\n >>> coord = (3, 5)\n >>> \'X: {0[0]}; Y: {0[1]}\'.format(coord)\n \'X: 3; Y: 5\'\n\nReplacing ``%s`` and ``%r``:\n\n >>> "repr() shows quotes: {!r}; str() doesn\'t: {!s}".format(\'test1\', \'test2\')\n "repr() shows quotes: \'test1\'; str() doesn\'t: test2"\n\nAligning the text and specifying a width:\n\n >>> \'{:<30}\'.format(\'left aligned\')\n \'left aligned \'\n >>> \'{:>30}\'.format(\'right aligned\')\n \' right aligned\'\n >>> \'{:^30}\'.format(\'centered\')\n \' centered \'\n >>> \'{:*^30}\'.format(\'centered\') # use \'*\' as a fill char\n \'***********centered***********\'\n\nReplacing ``%+f``, ``%-f``, and ``% f`` and specifying a sign:\n\n >>> \'{:+f}; {:+f}\'.format(3.14, -3.14) # show it always\n \'+3.140000; -3.140000\'\n >>> \'{: f}; {: f}\'.format(3.14, -3.14) # show a space for positive numbers\n \' 3.140000; -3.140000\'\n >>> \'{:-f}; {:-f}\'.format(3.14, -3.14) # show only the minus -- same as \'{:f}; {:f}\'\n \'3.140000; -3.140000\'\n\nReplacing ``%x`` and ``%o`` and converting the value to different\nbases:\n\n >>> # format also supports binary numbers\n >>> "int: {0:d}; hex: {0:x}; oct: {0:o}; bin: {0:b}".format(42)\n \'int: 42; hex: 2a; oct: 52; bin: 101010\'\n >>> # with 0x, 0o, or 0b as prefix:\n >>> "int: {0:d}; hex: {0:#x}; oct: {0:#o}; bin: {0:#b}".format(42)\n \'int: 42; hex: 0x2a; oct: 0o52; bin: 0b101010\'\n\nUsing the comma as a thousands separator:\n\n >>> \'{:,}\'.format(1234567890)\n \'1,234,567,890\'\n\nExpressing a percentage:\n\n >>> points = 19\n >>> total = 22\n >>> \'Correct answers: {:.2%}\'.format(points/total)\n \'Correct answers: 86.36%\'\n\nUsing type-specific formatting:\n\n >>> import datetime\n >>> d = datetime.datetime(2010, 7, 4, 12, 15, 58)\n >>> \'{:%Y-%m-%d %H:%M:%S}\'.format(d)\n \'2010-07-04 12:15:58\'\n\nNesting arguments and more complex examples:\n\n >>> for align, text in zip(\'<^>\', [\'left\', \'center\', \'right\']):\n ... \'{0:{fill}{align}16}\'.format(text, fill=align, align=align)\n ...\n \'left<<<<<<<<<<<<\'\n \'^^^^^center^^^^^\'\n \'>>>>>>>>>>>right\'\n >>>\n >>> octets = [192, 168, 0, 1]\n >>> \'{:02X}{:02X}{:02X}{:02X}\'.format(*octets)\n \'C0A80001\'\n >>> int(_, 16)\n 3232235521\n >>>\n >>> width = 5\n >>> for num in range(5,12):\n ... for base in \'dXob\':\n ... print(\'{0:{width}{base}}\'.format(num, base=base, width=width), end=\' \')\n ... print()\n ...\n 5 5 5 101\n 6 6 6 110\n 7 7 7 111\n 8 8 10 1000\n 9 9 11 1001\n 10 A 12 1010\n 11 B 13 1011\n',
- 'function': '\nFunction definitions\n********************\n\nA function definition defines a user-defined function object (see\nsection *The standard type hierarchy*):\n\n funcdef ::= [decorators] "def" funcname "(" [parameter_list] ")" ["->" expression] ":" suite\n decorators ::= decorator+\n decorator ::= "@" dotted_name ["(" [parameter_list [","]] ")"] NEWLINE\n dotted_name ::= identifier ("." identifier)*\n parameter_list ::= (defparameter ",")*\n ( "*" [parameter] ("," defparameter)*\n [, "**" parameter]\n | "**" parameter\n | defparameter [","] )\n parameter ::= identifier [":" expression]\n defparameter ::= parameter ["=" expression]\n funcname ::= identifier\n\nA function definition is an executable statement. Its execution binds\nthe function name in the current local namespace to a function object\n(a wrapper around the executable code for the function). This\nfunction object contains a reference to the current global namespace\nas the global namespace to be used when the function is called.\n\nThe function definition does not execute the function body; this gets\nexecuted only when the function is called. [3]\n\nA function definition may be wrapped by one or more *decorator*\nexpressions. Decorator expressions are evaluated when the function is\ndefined, in the scope that contains the function definition. The\nresult must be a callable, which is invoked with the function object\nas the only argument. The returned value is bound to the function name\ninstead of the function object. Multiple decorators are applied in\nnested fashion. For example, the following code\n\n @f1(arg)\n @f2\n def func(): pass\n\nis equivalent to\n\n def func(): pass\n func = f1(arg)(f2(func))\n\nWhen one or more parameters have the form *parameter* ``=``\n*expression*, the function is said to have "default parameter values."\nFor a parameter with a default value, the corresponding argument may\nbe omitted from a call, in which case the parameter\'s default value is\nsubstituted. If a parameter has a default value, all following\nparameters up until the "``*``" must also have a default value ---\nthis is a syntactic restriction that is not expressed by the grammar.\n\n**Default parameter values are evaluated when the function definition\nis executed.** This means that the expression is evaluated once, when\nthe function is defined, and that the same "pre-computed" value is\nused for each call. This is especially important to understand when a\ndefault parameter is a mutable object, such as a list or a dictionary:\nif the function modifies the object (e.g. by appending an item to a\nlist), the default value is in effect modified. This is generally not\nwhat was intended. A way around this is to use ``None`` as the\ndefault, and explicitly test for it in the body of the function, e.g.:\n\n def whats_on_the_telly(penguin=None):\n if penguin is None:\n penguin = []\n penguin.append("property of the zoo")\n return penguin\n\nFunction call semantics are described in more detail in section\n*Calls*. A function call always assigns values to all parameters\nmentioned in the parameter list, either from position arguments, from\nkeyword arguments, or from default values. If the form\n"``*identifier``" is present, it is initialized to a tuple receiving\nany excess positional parameters, defaulting to the empty tuple. If\nthe form "``**identifier``" is present, it is initialized to a new\ndictionary receiving any excess keyword arguments, defaulting to a new\nempty dictionary. Parameters after "``*``" or "``*identifier``" are\nkeyword-only parameters and may only be passed used keyword arguments.\n\nParameters may have annotations of the form "``: expression``"\nfollowing the parameter name. Any parameter may have an annotation\neven those of the form ``*identifier`` or ``**identifier``. Functions\nmay have "return" annotation of the form "``-> expression``" after the\nparameter list. These annotations can be any valid Python expression\nand are evaluated when the function definition is executed.\nAnnotations may be evaluated in a different order than they appear in\nthe source code. The presence of annotations does not change the\nsemantics of a function. The annotation values are available as\nvalues of a dictionary keyed by the parameters\' names in the\n``__annotations__`` attribute of the function object.\n\nIt is also possible to create anonymous functions (functions not bound\nto a name), for immediate use in expressions. This uses lambda forms,\ndescribed in section *Lambdas*. Note that the lambda form is merely a\nshorthand for a simplified function definition; a function defined in\na "``def``" statement can be passed around or assigned to another name\njust like a function defined by a lambda form. The "``def``" form is\nactually more powerful since it allows the execution of multiple\nstatements and annotations.\n\n**Programmer\'s note:** Functions are first-class objects. A "``def``"\nform executed inside a function definition defines a local function\nthat can be returned or passed around. Free variables used in the\nnested function can access the local variables of the function\ncontaining the def. See section *Naming and binding* for details.\n',
+ 'formatstrings': '\nFormat String Syntax\n********************\n\nThe ``str.format()`` method and the ``Formatter`` class share the same\nsyntax for format strings (although in the case of ``Formatter``,\nsubclasses can define their own format string syntax).\n\nFormat strings contain "replacement fields" surrounded by curly braces\n``{}``. Anything that is not contained in braces is considered literal\ntext, which is copied unchanged to the output. If you need to include\na brace character in the literal text, it can be escaped by doubling:\n``{{`` and ``}}``.\n\nThe grammar for a replacement field is as follows:\n\n replacement_field ::= "{" [field_name] ["!" conversion] [":" format_spec] "}"\n field_name ::= arg_name ("." attribute_name | "[" element_index "]")*\n arg_name ::= [identifier | integer]\n attribute_name ::= identifier\n element_index ::= integer | index_string\n index_string ::= <any source character except "]"> +\n conversion ::= "r" | "s" | "a"\n format_spec ::= <described in the next section>\n\nIn less formal terms, the replacement field can start with a\n*field_name* that specifies the object whose value is to be formatted\nand inserted into the output instead of the replacement field. The\n*field_name* is optionally followed by a *conversion* field, which is\npreceded by an exclamation point ``\'!\'``, and a *format_spec*, which\nis preceded by a colon ``\':\'``. These specify a non-default format\nfor the replacement value.\n\nSee also the *Format Specification Mini-Language* section.\n\nThe *field_name* itself begins with an *arg_name* that is either a\nnumber or a keyword. If it\'s a number, it refers to a positional\nargument, and if it\'s a keyword, it refers to a named keyword\nargument. If the numerical arg_names in a format string are 0, 1, 2,\n... in sequence, they can all be omitted (not just some) and the\nnumbers 0, 1, 2, ... will be automatically inserted in that order.\nBecause *arg_name* is not quote-delimited, it is not possible to\nspecify arbitrary dictionary keys (e.g., the strings ``\'10\'`` or\n``\':-]\'``) within a format string. The *arg_name* can be followed by\nany number of index or attribute expressions. An expression of the\nform ``\'.name\'`` selects the named attribute using ``getattr()``,\nwhile an expression of the form ``\'[index]\'`` does an index lookup\nusing ``__getitem__()``.\n\nChanged in version 3.1: The positional argument specifiers can be\nomitted, so ``\'{} {}\'`` is equivalent to ``\'{0} {1}\'``.\n\nSome simple format string examples:\n\n "First, thou shalt count to {0}" # References first positional argument\n "Bring me a {}" # Implicitly references the first positional argument\n "From {} to {}" # Same as "From {0} to {1}"\n "My quest is {name}" # References keyword argument \'name\'\n "Weight in tons {0.weight}" # \'weight\' attribute of first positional arg\n "Units destroyed: {players[0]}" # First element of keyword argument \'players\'.\n\nThe *conversion* field causes a type coercion before formatting.\nNormally, the job of formatting a value is done by the\n``__format__()`` method of the value itself. However, in some cases\nit is desirable to force a type to be formatted as a string,\noverriding its own definition of formatting. By converting the value\nto a string before calling ``__format__()``, the normal formatting\nlogic is bypassed.\n\nThree conversion flags are currently supported: ``\'!s\'`` which calls\n``str()`` on the value, ``\'!r\'`` which calls ``repr()`` and ``\'!a\'``\nwhich calls ``ascii()``.\n\nSome examples:\n\n "Harold\'s a clever {0!s}" # Calls str() on the argument first\n "Bring out the holy {name!r}" # Calls repr() on the argument first\n "More {!a}" # Calls ascii() on the argument first\n\nThe *format_spec* field contains a specification of how the value\nshould be presented, including such details as field width, alignment,\npadding, decimal precision and so on. Each value type can define its\nown "formatting mini-language" or interpretation of the *format_spec*.\n\nMost built-in types support a common formatting mini-language, which\nis described in the next section.\n\nA *format_spec* field can also include nested replacement fields\nwithin it. These nested replacement fields can contain only a field\nname; conversion flags and format specifications are not allowed. The\nreplacement fields within the format_spec are substituted before the\n*format_spec* string is interpreted. This allows the formatting of a\nvalue to be dynamically specified.\n\nSee the *Format examples* section for some examples.\n\n\nFormat Specification Mini-Language\n==================================\n\n"Format specifications" are used within replacement fields contained\nwithin a format string to define how individual values are presented\n(see *Format String Syntax*). They can also be passed directly to the\nbuilt-in ``format()`` function. Each formattable type may define how\nthe format specification is to be interpreted.\n\nMost built-in types implement the following options for format\nspecifications, although some of the formatting options are only\nsupported by the numeric types.\n\nA general convention is that an empty format string (``""``) produces\nthe same result as if you had called ``str()`` on the value. A non-\nempty format string typically modifies the result.\n\nThe general form of a *standard format specifier* is:\n\n format_spec ::= [[fill]align][sign][#][0][width][,][.precision][type]\n fill ::= <a character other than \'{\' or \'}\'>\n align ::= "<" | ">" | "=" | "^"\n sign ::= "+" | "-" | " "\n width ::= integer\n precision ::= integer\n type ::= "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%"\n\nThe *fill* character can be any character other than \'{\' or \'}\'. The\npresence of a fill character is signaled by the character following\nit, which must be one of the alignment options. If the second\ncharacter of *format_spec* is not a valid alignment option, then it is\nassumed that both the fill character and the alignment option are\nabsent.\n\nThe meaning of the various alignment options is as follows:\n\n +-----------+------------------------------------------------------------+\n | Option | Meaning |\n +===========+============================================================+\n | ``\'<\'`` | Forces the field to be left-aligned within the available |\n | | space (this is the default for most objects). |\n +-----------+------------------------------------------------------------+\n | ``\'>\'`` | Forces the field to be right-aligned within the available |\n | | space (this is the default for numbers). |\n +-----------+------------------------------------------------------------+\n | ``\'=\'`` | Forces the padding to be placed after the sign (if any) |\n | | but before the digits. This is used for printing fields |\n | | in the form \'+000000120\'. This alignment option is only |\n | | valid for numeric types. |\n +-----------+------------------------------------------------------------+\n | ``\'^\'`` | Forces the field to be centered within the available |\n | | space. |\n +-----------+------------------------------------------------------------+\n\nNote that unless a minimum field width is defined, the field width\nwill always be the same size as the data to fill it, so that the\nalignment option has no meaning in this case.\n\nThe *sign* option is only valid for number types, and can be one of\nthe following:\n\n +-----------+------------------------------------------------------------+\n | Option | Meaning |\n +===========+============================================================+\n | ``\'+\'`` | indicates that a sign should be used for both positive as |\n | | well as negative numbers. |\n +-----------+------------------------------------------------------------+\n | ``\'-\'`` | indicates that a sign should be used only for negative |\n | | numbers (this is the default behavior). |\n +-----------+------------------------------------------------------------+\n | space | indicates that a leading space should be used on positive |\n | | numbers, and a minus sign on negative numbers. |\n +-----------+------------------------------------------------------------+\n\nThe ``\'#\'`` option causes the "alternate form" to be used for the\nconversion. The alternate form is defined differently for different\ntypes. This option is only valid for integer, float, complex and\nDecimal types. For integers, when binary, octal, or hexadecimal output\nis used, this option adds the prefix respective ``\'0b\'``, ``\'0o\'``, or\n``\'0x\'`` to the output value. For floats, complex and Decimal the\nalternate form causes the result of the conversion to always contain a\ndecimal-point character, even if no digits follow it. Normally, a\ndecimal-point character appears in the result of these conversions\nonly if a digit follows it. In addition, for ``\'g\'`` and ``\'G\'``\nconversions, trailing zeros are not removed from the result.\n\nThe ``\',\'`` option signals the use of a comma for a thousands\nseparator. For a locale aware separator, use the ``\'n\'`` integer\npresentation type instead.\n\nChanged in version 3.1: Added the ``\',\'`` option (see also **PEP\n378**).\n\n*width* is a decimal integer defining the minimum field width. If not\nspecified, then the field width will be determined by the content.\n\nPreceding the *width* field by a zero (``\'0\'``) character enables\nsign-aware zero-padding for numeric types. This is equivalent to a\n*fill* character of ``\'0\'`` with an *alignment* type of ``\'=\'``.\n\nThe *precision* is a decimal number indicating how many digits should\nbe displayed after the decimal point for a floating point value\nformatted with ``\'f\'`` and ``\'F\'``, or before and after the decimal\npoint for a floating point value formatted with ``\'g\'`` or ``\'G\'``.\nFor non-number types the field indicates the maximum field size - in\nother words, how many characters will be used from the field content.\nThe *precision* is not allowed for integer values.\n\nFinally, the *type* determines how the data should be presented.\n\nThe available string presentation types are:\n\n +-----------+------------------------------------------------------------+\n | Type | Meaning |\n +===========+============================================================+\n | ``\'s\'`` | String format. This is the default type for strings and |\n | | may be omitted. |\n +-----------+------------------------------------------------------------+\n | None | The same as ``\'s\'``. |\n +-----------+------------------------------------------------------------+\n\nThe available integer presentation types are:\n\n +-----------+------------------------------------------------------------+\n | Type | Meaning |\n +===========+============================================================+\n | ``\'b\'`` | Binary format. Outputs the number in base 2. |\n +-----------+------------------------------------------------------------+\n | ``\'c\'`` | Character. Converts the integer to the corresponding |\n | | unicode character before printing. |\n +-----------+------------------------------------------------------------+\n | ``\'d\'`` | Decimal Integer. Outputs the number in base 10. |\n +-----------+------------------------------------------------------------+\n | ``\'o\'`` | Octal format. Outputs the number in base 8. |\n +-----------+------------------------------------------------------------+\n | ``\'x\'`` | Hex format. Outputs the number in base 16, using lower- |\n | | case letters for the digits above 9. |\n +-----------+------------------------------------------------------------+\n | ``\'X\'`` | Hex format. Outputs the number in base 16, using upper- |\n | | case letters for the digits above 9. |\n +-----------+------------------------------------------------------------+\n | ``\'n\'`` | Number. This is the same as ``\'d\'``, except that it uses |\n | | the current locale setting to insert the appropriate |\n | | number separator characters. |\n +-----------+------------------------------------------------------------+\n | None | The same as ``\'d\'``. |\n +-----------+------------------------------------------------------------+\n\nIn addition to the above presentation types, integers can be formatted\nwith the floating point presentation types listed below (except\n``\'n\'`` and None). When doing so, ``float()`` is used to convert the\ninteger to a floating point number before formatting.\n\nThe available presentation types for floating point and decimal values\nare:\n\n +-----------+------------------------------------------------------------+\n | Type | Meaning |\n +===========+============================================================+\n | ``\'e\'`` | Exponent notation. Prints the number in scientific |\n | | notation using the letter \'e\' to indicate the exponent. |\n +-----------+------------------------------------------------------------+\n | ``\'E\'`` | Exponent notation. Same as ``\'e\'`` except it uses an upper |\n | | case \'E\' as the separator character. |\n +-----------+------------------------------------------------------------+\n | ``\'f\'`` | Fixed point. Displays the number as a fixed-point number. |\n +-----------+------------------------------------------------------------+\n | ``\'F\'`` | Fixed point. Same as ``\'f\'``, but converts ``nan`` to |\n | | ``NAN`` and ``inf`` to ``INF``. |\n +-----------+------------------------------------------------------------+\n | ``\'g\'`` | General format. For a given precision ``p >= 1``, this |\n | | rounds the number to ``p`` significant digits and then |\n | | formats the result in either fixed-point format or in |\n | | scientific notation, depending on its magnitude. The |\n | | precise rules are as follows: suppose that the result |\n | | formatted with presentation type ``\'e\'`` and precision |\n | | ``p-1`` would have exponent ``exp``. Then if ``-4 <= exp |\n | | < p``, the number is formatted with presentation type |\n | | ``\'f\'`` and precision ``p-1-exp``. Otherwise, the number |\n | | is formatted with presentation type ``\'e\'`` and precision |\n | | ``p-1``. In both cases insignificant trailing zeros are |\n | | removed from the significand, and the decimal point is |\n | | also removed if there are no remaining digits following |\n | | it. Positive and negative infinity, positive and negative |\n | | zero, and nans, are formatted as ``inf``, ``-inf``, ``0``, |\n | | ``-0`` and ``nan`` respectively, regardless of the |\n | | precision. A precision of ``0`` is treated as equivalent |\n | | to a precision of ``1``. |\n +-----------+------------------------------------------------------------+\n | ``\'G\'`` | General format. Same as ``\'g\'`` except switches to ``\'E\'`` |\n | | if the number gets too large. The representations of |\n | | infinity and NaN are uppercased, too. |\n +-----------+------------------------------------------------------------+\n | ``\'n\'`` | Number. This is the same as ``\'g\'``, except that it uses |\n | | the current locale setting to insert the appropriate |\n | | number separator characters. |\n +-----------+------------------------------------------------------------+\n | ``\'%\'`` | Percentage. Multiplies the number by 100 and displays in |\n | | fixed (``\'f\'``) format, followed by a percent sign. |\n +-----------+------------------------------------------------------------+\n | None | Similar to ``\'g\'``, except with at least one digit past |\n | | the decimal point and a default precision of 12. This is |\n | | intended to match ``str()``, except you can add the other |\n | | format modifiers. |\n +-----------+------------------------------------------------------------+\n\n\nFormat examples\n===============\n\nThis section contains examples of the new format syntax and comparison\nwith the old ``%``-formatting.\n\nIn most of the cases the syntax is similar to the old\n``%``-formatting, with the addition of the ``{}`` and with ``:`` used\ninstead of ``%``. For example, ``\'%03.2f\'`` can be translated to\n``\'{:03.2f}\'``.\n\nThe new format syntax also supports new and different options, shown\nin the follow examples.\n\nAccessing arguments by position:\n\n >>> \'{0}, {1}, {2}\'.format(\'a\', \'b\', \'c\')\n \'a, b, c\'\n >>> \'{}, {}, {}\'.format(\'a\', \'b\', \'c\') # 3.1+ only\n \'a, b, c\'\n >>> \'{2}, {1}, {0}\'.format(\'a\', \'b\', \'c\')\n \'c, b, a\'\n >>> \'{2}, {1}, {0}\'.format(*\'abc\') # unpacking argument sequence\n \'c, b, a\'\n >>> \'{0}{1}{0}\'.format(\'abra\', \'cad\') # arguments\' indices can be repeated\n \'abracadabra\'\n\nAccessing arguments by name:\n\n >>> \'Coordinates: {latitude}, {longitude}\'.format(latitude=\'37.24N\', longitude=\'-115.81W\')\n \'Coordinates: 37.24N, -115.81W\'\n >>> coord = {\'latitude\': \'37.24N\', \'longitude\': \'-115.81W\'}\n >>> \'Coordinates: {latitude}, {longitude}\'.format(**coord)\n \'Coordinates: 37.24N, -115.81W\'\n\nAccessing arguments\' attributes:\n\n >>> c = 3-5j\n >>> (\'The complex number {0} is formed from the real part {0.real} \'\n ... \'and the imaginary part {0.imag}.\').format(c)\n \'The complex number (3-5j) is formed from the real part 3.0 and the imaginary part -5.0.\'\n >>> class Point:\n ... def __init__(self, x, y):\n ... self.x, self.y = x, y\n ... def __str__(self):\n ... return \'Point({self.x}, {self.y})\'.format(self=self)\n ...\n >>> str(Point(4, 2))\n \'Point(4, 2)\'\n\nAccessing arguments\' items:\n\n >>> coord = (3, 5)\n >>> \'X: {0[0]}; Y: {0[1]}\'.format(coord)\n \'X: 3; Y: 5\'\n\nReplacing ``%s`` and ``%r``:\n\n >>> "repr() shows quotes: {!r}; str() doesn\'t: {!s}".format(\'test1\', \'test2\')\n "repr() shows quotes: \'test1\'; str() doesn\'t: test2"\n\nAligning the text and specifying a width:\n\n >>> \'{:<30}\'.format(\'left aligned\')\n \'left aligned \'\n >>> \'{:>30}\'.format(\'right aligned\')\n \' right aligned\'\n >>> \'{:^30}\'.format(\'centered\')\n \' centered \'\n >>> \'{:*^30}\'.format(\'centered\') # use \'*\' as a fill char\n \'***********centered***********\'\n\nReplacing ``%+f``, ``%-f``, and ``% f`` and specifying a sign:\n\n >>> \'{:+f}; {:+f}\'.format(3.14, -3.14) # show it always\n \'+3.140000; -3.140000\'\n >>> \'{: f}; {: f}\'.format(3.14, -3.14) # show a space for positive numbers\n \' 3.140000; -3.140000\'\n >>> \'{:-f}; {:-f}\'.format(3.14, -3.14) # show only the minus -- same as \'{:f}; {:f}\'\n \'3.140000; -3.140000\'\n\nReplacing ``%x`` and ``%o`` and converting the value to different\nbases:\n\n >>> # format also supports binary numbers\n >>> "int: {0:d}; hex: {0:x}; oct: {0:o}; bin: {0:b}".format(42)\n \'int: 42; hex: 2a; oct: 52; bin: 101010\'\n >>> # with 0x, 0o, or 0b as prefix:\n >>> "int: {0:d}; hex: {0:#x}; oct: {0:#o}; bin: {0:#b}".format(42)\n \'int: 42; hex: 0x2a; oct: 0o52; bin: 0b101010\'\n\nUsing the comma as a thousands separator:\n\n >>> \'{:,}\'.format(1234567890)\n \'1,234,567,890\'\n\nExpressing a percentage:\n\n >>> points = 19\n >>> total = 22\n >>> \'Correct answers: {:.2%}\'.format(points/total)\n \'Correct answers: 86.36%\'\n\nUsing type-specific formatting:\n\n >>> import datetime\n >>> d = datetime.datetime(2010, 7, 4, 12, 15, 58)\n >>> \'{:%Y-%m-%d %H:%M:%S}\'.format(d)\n \'2010-07-04 12:15:58\'\n\nNesting arguments and more complex examples:\n\n >>> for align, text in zip(\'<^>\', [\'left\', \'center\', \'right\']):\n ... \'{0:{fill}{align}16}\'.format(text, fill=align, align=align)\n ...\n \'left<<<<<<<<<<<<\'\n \'^^^^^center^^^^^\'\n \'>>>>>>>>>>>right\'\n >>>\n >>> octets = [192, 168, 0, 1]\n >>> \'{:02X}{:02X}{:02X}{:02X}\'.format(*octets)\n \'C0A80001\'\n >>> int(_, 16)\n 3232235521\n >>>\n >>> width = 5\n >>> for num in range(5,12):\n ... for base in \'dXob\':\n ... print(\'{0:{width}{base}}\'.format(num, base=base, width=width), end=\' \')\n ... print()\n ...\n 5 5 5 101\n 6 6 6 110\n 7 7 7 111\n 8 8 10 1000\n 9 9 11 1001\n 10 A 12 1010\n 11 B 13 1011\n',
+ 'function': '\nFunction definitions\n********************\n\nA function definition defines a user-defined function object (see\nsection *The standard type hierarchy*):\n\n funcdef ::= [decorators] "def" funcname "(" [parameter_list] ")" ["->" expression] ":" suite\n decorators ::= decorator+\n decorator ::= "@" dotted_name ["(" [parameter_list [","]] ")"] NEWLINE\n dotted_name ::= identifier ("." identifier)*\n parameter_list ::= (defparameter ",")*\n ( "*" [parameter] ("," defparameter)*\n [, "**" parameter]\n | "**" parameter\n | defparameter [","] )\n parameter ::= identifier [":" expression]\n defparameter ::= parameter ["=" expression]\n funcname ::= identifier\n\nA function definition is an executable statement. Its execution binds\nthe function name in the current local namespace to a function object\n(a wrapper around the executable code for the function). This\nfunction object contains a reference to the current global namespace\nas the global namespace to be used when the function is called.\n\nThe function definition does not execute the function body; this gets\nexecuted only when the function is called. [3]\n\nA function definition may be wrapped by one or more *decorator*\nexpressions. Decorator expressions are evaluated when the function is\ndefined, in the scope that contains the function definition. The\nresult must be a callable, which is invoked with the function object\nas the only argument. The returned value is bound to the function name\ninstead of the function object. Multiple decorators are applied in\nnested fashion. For example, the following code\n\n @f1(arg)\n @f2\n def func(): pass\n\nis equivalent to\n\n def func(): pass\n func = f1(arg)(f2(func))\n\nWhen one or more parameters have the form *parameter* ``=``\n*expression*, the function is said to have "default parameter values."\nFor a parameter with a default value, the corresponding argument may\nbe omitted from a call, in which case the parameter\'s default value is\nsubstituted. If a parameter has a default value, all following\nparameters up until the "``*``" must also have a default value ---\nthis is a syntactic restriction that is not expressed by the grammar.\n\n**Default parameter values are evaluated when the function definition\nis executed.** This means that the expression is evaluated once, when\nthe function is defined, and that the same "pre-computed" value is\nused for each call. This is especially important to understand when a\ndefault parameter is a mutable object, such as a list or a dictionary:\nif the function modifies the object (e.g. by appending an item to a\nlist), the default value is in effect modified. This is generally not\nwhat was intended. A way around this is to use ``None`` as the\ndefault, and explicitly test for it in the body of the function, e.g.:\n\n def whats_on_the_telly(penguin=None):\n if penguin is None:\n penguin = []\n penguin.append("property of the zoo")\n return penguin\n\nFunction call semantics are described in more detail in section\n*Calls*. A function call always assigns values to all parameters\nmentioned in the parameter list, either from position arguments, from\nkeyword arguments, or from default values. If the form\n"``*identifier``" is present, it is initialized to a tuple receiving\nany excess positional parameters, defaulting to the empty tuple. If\nthe form "``**identifier``" is present, it is initialized to a new\ndictionary receiving any excess keyword arguments, defaulting to a new\nempty dictionary. Parameters after "``*``" or "``*identifier``" are\nkeyword-only parameters and may only be passed used keyword arguments.\n\nParameters may have annotations of the form "``: expression``"\nfollowing the parameter name. Any parameter may have an annotation\neven those of the form ``*identifier`` or ``**identifier``. Functions\nmay have "return" annotation of the form "``-> expression``" after the\nparameter list. These annotations can be any valid Python expression\nand are evaluated when the function definition is executed.\nAnnotations may be evaluated in a different order than they appear in\nthe source code. The presence of annotations does not change the\nsemantics of a function. The annotation values are available as\nvalues of a dictionary keyed by the parameters\' names in the\n``__annotations__`` attribute of the function object.\n\nIt is also possible to create anonymous functions (functions not bound\nto a name), for immediate use in expressions. This uses lambda forms,\ndescribed in section *Lambdas*. Note that the lambda form is merely a\nshorthand for a simplified function definition; a function defined in\na "``def``" statement can be passed around or assigned to another name\njust like a function defined by a lambda form. The "``def``" form is\nactually more powerful since it allows the execution of multiple\nstatements and annotations.\n\n**Programmer\'s note:** Functions are first-class objects. A "``def``"\nform executed inside a function definition defines a local function\nthat can be returned or passed around. Free variables used in the\nnested function can access the local variables of the function\ncontaining the def. See section *Naming and binding* for details.\n\nSee also:\n\n **PEP 3107** - Function Annotations\n The original specification for function annotations.\n',
'global': '\nThe ``global`` statement\n************************\n\n global_stmt ::= "global" identifier ("," identifier)*\n\nThe ``global`` statement is a declaration which holds for the entire\ncurrent code block. It means that the listed identifiers are to be\ninterpreted as globals. It would be impossible to assign to a global\nvariable without ``global``, although free variables may refer to\nglobals without being declared global.\n\nNames listed in a ``global`` statement must not be used in the same\ncode block textually preceding that ``global`` statement.\n\nNames listed in a ``global`` statement must not be defined as formal\nparameters or in a ``for`` loop control target, ``class`` definition,\nfunction definition, or ``import`` statement.\n\n**CPython implementation detail:** The current implementation does not\nenforce the latter two restrictions, but programs should not abuse\nthis freedom, as future implementations may enforce them or silently\nchange the meaning of the program.\n\n**Programmer\'s note:** the ``global`` is a directive to the parser.\nIt applies only to code parsed at the same time as the ``global``\nstatement. In particular, a ``global`` statement contained in a string\nor code object supplied to the built-in ``exec()`` function does not\naffect the code block *containing* the function call, and code\ncontained in such a string is unaffected by ``global`` statements in\nthe code containing the function call. The same applies to the\n``eval()`` and ``compile()`` functions.\n',
'id-classes': '\nReserved classes of identifiers\n*******************************\n\nCertain classes of identifiers (besides keywords) have special\nmeanings. These classes are identified by the patterns of leading and\ntrailing underscore characters:\n\n``_*``\n Not imported by ``from module import *``. The special identifier\n ``_`` is used in the interactive interpreter to store the result of\n the last evaluation; it is stored in the ``builtins`` module. When\n not in interactive mode, ``_`` has no special meaning and is not\n defined. See section *The import statement*.\n\n Note: The name ``_`` is often used in conjunction with\n internationalization; refer to the documentation for the\n ``gettext`` module for more information on this convention.\n\n``__*__``\n System-defined names. These names are defined by the interpreter\n and its implementation (including the standard library). Current\n system names are discussed in the *Special method names* section\n and elsewhere. More will likely be defined in future versions of\n Python. *Any* use of ``__*__`` names, in any context, that does\n not follow explicitly documented use, is subject to breakage\n without warning.\n\n``__*``\n Class-private names. Names in this category, when used within the\n context of a class definition, are re-written to use a mangled form\n to help avoid name clashes between "private" attributes of base and\n derived classes. See section *Identifiers (Names)*.\n',
'identifiers': '\nIdentifiers and keywords\n************************\n\nIdentifiers (also referred to as *names*) are described by the\nfollowing lexical definitions.\n\nThe syntax of identifiers in Python is based on the Unicode standard\nannex UAX-31, with elaboration and changes as defined below; see also\n**PEP 3131** for further details.\n\nWithin the ASCII range (U+0001..U+007F), the valid characters for\nidentifiers are the same as in Python 2.x: the uppercase and lowercase\nletters ``A`` through ``Z``, the underscore ``_`` and, except for the\nfirst character, the digits ``0`` through ``9``.\n\nPython 3.0 introduces additional characters from outside the ASCII\nrange (see **PEP 3131**). For these characters, the classification\nuses the version of the Unicode Character Database as included in the\n``unicodedata`` module.\n\nIdentifiers are unlimited in length. Case is significant.\n\n identifier ::= xid_start xid_continue*\n id_start ::= <all characters in general categories Lu, Ll, Lt, Lm, Lo, Nl, the underscore, and characters with the Other_ID_Start property>\n id_continue ::= <all characters in id_start, plus characters in the categories Mn, Mc, Nd, Pc and others with the Other_ID_Continue property>\n xid_start ::= <all characters in id_start whose NFKC normalization is in "id_start xid_continue*">\n xid_continue ::= <all characters in id_continue whose NFKC normalization is in "id_continue*">\n\nThe Unicode category codes mentioned above stand for:\n\n* *Lu* - uppercase letters\n\n* *Ll* - lowercase letters\n\n* *Lt* - titlecase letters\n\n* *Lm* - modifier letters\n\n* *Lo* - other letters\n\n* *Nl* - letter numbers\n\n* *Mn* - nonspacing marks\n\n* *Mc* - spacing combining marks\n\n* *Nd* - decimal numbers\n\n* *Pc* - connector punctuations\n\n* *Other_ID_Start* - explicit list of characters in PropList.txt to\n support backwards compatibility\n\n* *Other_ID_Continue* - likewise\n\nAll identifiers are converted into the normal form NFKC while parsing;\ncomparison of identifiers is based on NFKC.\n\nA non-normative HTML file listing all valid identifier characters for\nUnicode 4.1 can be found at http://www.dcl.hpi.uni-\npotsdam.de/home/loewis/table-3131.html.\n\n\nKeywords\n========\n\nThe following identifiers are used as reserved words, or *keywords* of\nthe language, and cannot be used as ordinary identifiers. They must\nbe spelled exactly as written here:\n\n False class finally is return\n None continue for lambda try\n True def from nonlocal while\n and del global not with\n as elif if or yield\n assert else import pass\n break except in raise\n\n\nReserved classes of identifiers\n===============================\n\nCertain classes of identifiers (besides keywords) have special\nmeanings. These classes are identified by the patterns of leading and\ntrailing underscore characters:\n\n``_*``\n Not imported by ``from module import *``. The special identifier\n ``_`` is used in the interactive interpreter to store the result of\n the last evaluation; it is stored in the ``builtins`` module. When\n not in interactive mode, ``_`` has no special meaning and is not\n defined. See section *The import statement*.\n\n Note: The name ``_`` is often used in conjunction with\n internationalization; refer to the documentation for the\n ``gettext`` module for more information on this convention.\n\n``__*__``\n System-defined names. These names are defined by the interpreter\n and its implementation (including the standard library). Current\n system names are discussed in the *Special method names* section\n and elsewhere. More will likely be defined in future versions of\n Python. *Any* use of ``__*__`` names, in any context, that does\n not follow explicitly documented use, is subject to breakage\n without warning.\n\n``__*``\n Class-private names. Names in this category, when used within the\n context of a class definition, are re-written to use a mangled form\n to help avoid name clashes between "private" attributes of base and\n derived classes. See section *Identifiers (Names)*.\n',
'if': '\nThe ``if`` statement\n********************\n\nThe ``if`` statement is used for conditional execution:\n\n if_stmt ::= "if" expression ":" suite\n ( "elif" expression ":" suite )*\n ["else" ":" suite]\n\nIt selects exactly one of the suites by evaluating the expressions one\nby one until one is found to be true (see section *Boolean operations*\nfor the definition of true and false); then that suite is executed\n(and no other part of the ``if`` statement is executed or evaluated).\nIf all expressions are false, the suite of the ``else`` clause, if\npresent, is executed.\n',
'imaginary': '\nImaginary literals\n******************\n\nImaginary literals are described by the following lexical definitions:\n\n imagnumber ::= (floatnumber | intpart) ("j" | "J")\n\nAn imaginary literal yields a complex number with a real part of 0.0.\nComplex numbers are represented as a pair of floating point numbers\nand have the same restrictions on their range. To create a complex\nnumber with a nonzero real part, add a floating point number to it,\ne.g., ``(3+4j)``. Some examples of imaginary literals:\n\n 3.14j 10.j 10j .001j 1e100j 3.14e-10j\n',
- 'import': '\nThe ``import`` statement\n************************\n\n import_stmt ::= "import" module ["as" name] ( "," module ["as" name] )*\n | "from" relative_module "import" identifier ["as" name]\n ( "," identifier ["as" name] )*\n | "from" relative_module "import" "(" identifier ["as" name]\n ( "," identifier ["as" name] )* [","] ")"\n | "from" module "import" "*"\n module ::= (identifier ".")* identifier\n relative_module ::= "."* module | "."+\n name ::= identifier\n\nImport statements are executed in two steps: (1) find a module, and\ninitialize it if necessary; (2) define a name or names in the local\nnamespace (of the scope where the ``import`` statement occurs). The\nstatement comes in two forms differing on whether it uses the ``from``\nkeyword. The first form (without ``from``) repeats these steps for\neach identifier in the list. The form with ``from`` performs step (1)\nonce, and then performs step (2) repeatedly. For a reference\nimplementation of step (1), see the ``importlib`` module.\n\nTo understand how step (1) occurs, one must first understand how\nPython handles hierarchical naming of modules. To help organize\nmodules and provide a hierarchy in naming, Python has a concept of\npackages. A package can contain other packages and modules while\nmodules cannot contain other modules or packages. From a file system\nperspective, packages are directories and modules are files. The\noriginal specification for packages is still available to read,\nalthough minor details have changed since the writing of that\ndocument.\n\nOnce the name of the module is known (unless otherwise specified, the\nterm "module" will refer to both packages and modules), searching for\nthe module or package can begin. The first place checked is\n``sys.modules``, the cache of all modules that have been imported\npreviously. If the module is found there then it is used in step (2)\nof import unless ``None`` is found in ``sys.modules``, in which case\n``ImportError`` is raised.\n\nIf the module is not found in the cache, then ``sys.meta_path`` is\nsearched (the specification for ``sys.meta_path`` can be found in\n**PEP 302**). The object is a list of *finder* objects which are\nqueried in order as to whether they know how to load the module by\ncalling their ``find_module()`` method with the name of the module. If\nthe module happens to be contained within a package (as denoted by the\nexistence of a dot in the name), then a second argument to\n``find_module()`` is given as the value of the ``__path__`` attribute\nfrom the parent package (everything up to the last dot in the name of\nthe module being imported). If a finder can find the module it returns\na *loader* (discussed later) or returns ``None``.\n\nIf none of the finders on ``sys.meta_path`` are able to find the\nmodule then some implicitly defined finders are queried.\nImplementations of Python vary in what implicit meta path finders are\ndefined. The one they all do define, though, is one that handles\n``sys.path_hooks``, ``sys.path_importer_cache``, and ``sys.path``.\n\nThe implicit finder searches for the requested module in the "paths"\nspecified in one of two places ("paths" do not have to be file system\npaths). If the module being imported is supposed to be contained\nwithin a package then the second argument passed to ``find_module()``,\n``__path__`` on the parent package, is used as the source of paths. If\nthe module is not contained in a package then ``sys.path`` is used as\nthe source of paths.\n\nOnce the source of paths is chosen it is iterated over to find a\nfinder that can handle that path. The dict at\n``sys.path_importer_cache`` caches finders for paths and is checked\nfor a finder. If the path does not have a finder cached then\n``sys.path_hooks`` is searched by calling each object in the list with\na single argument of the path, returning a finder or raises\n``ImportError``. If a finder is returned then it is cached in\n``sys.path_importer_cache`` and then used for that path entry. If no\nfinder can be found but the path exists then a value of ``None`` is\nstored in ``sys.path_importer_cache`` to signify that an implicit,\nfile-based finder that handles modules stored as individual files\nshould be used for that path. If the path does not exist then a finder\nwhich always returns ``None`` is placed in the cache for the path.\n\nIf no finder can find the module then ``ImportError`` is raised.\nOtherwise some finder returned a loader whose ``load_module()`` method\nis called with the name of the module to load (see **PEP 302** for the\noriginal definition of loaders). A loader has several responsibilities\nto perform on a module it loads. First, if the module already exists\nin ``sys.modules`` (a possibility if the loader is called outside of\nthe import machinery) then it is to use that module for initialization\nand not a new module. But if the module does not exist in\n``sys.modules`` then it is to be added to that dict before\ninitialization begins. If an error occurs during loading of the module\nand it was added to ``sys.modules`` it is to be removed from the dict.\nIf an error occurs but the module was already in ``sys.modules`` it is\nleft in the dict.\n\nThe loader must set several attributes on the module. ``__name__`` is\nto be set to the name of the module. ``__file__`` is to be the "path"\nto the file unless the module is built-in (and thus listed in\n``sys.builtin_module_names``) in which case the attribute is not set.\nIf what is being imported is a package then ``__path__`` is to be set\nto a list of paths to be searched when looking for modules and\npackages contained within the package being imported. ``__package__``\nis optional but should be set to the name of package that contains the\nmodule or package (the empty string is used for module not contained\nin a package). ``__loader__`` is also optional but should be set to\nthe loader object that is loading the module.\n\nIf an error occurs during loading then the loader raises\n``ImportError`` if some other exception is not already being\npropagated. Otherwise the loader returns the module that was loaded\nand initialized.\n\nWhen step (1) finishes without raising an exception, step (2) can\nbegin.\n\nThe first form of ``import`` statement binds the module name in the\nlocal namespace to the module object, and then goes on to import the\nnext identifier, if any. If the module name is followed by ``as``,\nthe name following ``as`` is used as the local name for the module.\n\nThe ``from`` form does not bind the module name: it goes through the\nlist of identifiers, looks each one of them up in the module found in\nstep (1), and binds the name in the local namespace to the object thus\nfound. As with the first form of ``import``, an alternate local name\ncan be supplied by specifying "``as`` localname". If a name is not\nfound, ``ImportError`` is raised. If the list of identifiers is\nreplaced by a star (``\'*\'``), all public names defined in the module\nare bound in the local namespace of the ``import`` statement.\n\nThe *public names* defined by a module are determined by checking the\nmodule\'s namespace for a variable named ``__all__``; if defined, it\nmust be a sequence of strings which are names defined or imported by\nthat module. The names given in ``__all__`` are all considered public\nand are required to exist. If ``__all__`` is not defined, the set of\npublic names includes all names found in the module\'s namespace which\ndo not begin with an underscore character (``\'_\'``). ``__all__``\nshould contain the entire public API. It is intended to avoid\naccidentally exporting items that are not part of the API (such as\nlibrary modules which were imported and used within the module).\n\nThe ``from`` form with ``*`` may only occur in a module scope. The\nwild card form of import --- ``import *`` --- is only allowed at the\nmodule level. Attempting to use it in class or function definitions\nwill raise a ``SyntaxError``.\n\nWhen specifying what module to import you do not have to specify the\nabsolute name of the module. When a module or package is contained\nwithin another package it is possible to make a relative import within\nthe same top package without having to mention the package name. By\nusing leading dots in the specified module or package after ``from``\nyou can specify how high to traverse up the current package hierarchy\nwithout specifying exact names. One leading dot means the current\npackage where the module making the import exists. Two dots means up\none package level. Three dots is up two levels, etc. So if you execute\n``from . import mod`` from a module in the ``pkg`` package then you\nwill end up importing ``pkg.mod``. If you execute ``from ..subpkg2\nimport mod`` from within ``pkg.subpkg1`` you will import\n``pkg.subpkg2.mod``. The specification for relative imports is\ncontained within **PEP 328**.\n\n``importlib.import_module()`` is provided to support applications that\ndetermine which modules need to be loaded dynamically.\n\n\nFuture statements\n=================\n\nA *future statement* is a directive to the compiler that a particular\nmodule should be compiled using syntax or semantics that will be\navailable in a specified future release of Python. The future\nstatement is intended to ease migration to future versions of Python\nthat introduce incompatible changes to the language. It allows use of\nthe new features on a per-module basis before the release in which the\nfeature becomes standard.\n\n future_statement ::= "from" "__future__" "import" feature ["as" name]\n ("," feature ["as" name])*\n | "from" "__future__" "import" "(" feature ["as" name]\n ("," feature ["as" name])* [","] ")"\n feature ::= identifier\n name ::= identifier\n\nA future statement must appear near the top of the module. The only\nlines that can appear before a future statement are:\n\n* the module docstring (if any),\n\n* comments,\n\n* blank lines, and\n\n* other future statements.\n\nThe features recognized by Python 3.0 are ``absolute_import``,\n``division``, ``generators``, ``unicode_literals``,\n``print_function``, ``nested_scopes`` and ``with_statement``. They\nare all redundant because they are always enabled, and only kept for\nbackwards compatibility.\n\nA future statement is recognized and treated specially at compile\ntime: Changes to the semantics of core constructs are often\nimplemented by generating different code. It may even be the case\nthat a new feature introduces new incompatible syntax (such as a new\nreserved word), in which case the compiler may need to parse the\nmodule differently. Such decisions cannot be pushed off until\nruntime.\n\nFor any given release, the compiler knows which feature names have\nbeen defined, and raises a compile-time error if a future statement\ncontains a feature not known to it.\n\nThe direct runtime semantics are the same as for any import statement:\nthere is a standard module ``__future__``, described later, and it\nwill be imported in the usual way at the time the future statement is\nexecuted.\n\nThe interesting runtime semantics depend on the specific feature\nenabled by the future statement.\n\nNote that there is nothing special about the statement:\n\n import __future__ [as name]\n\nThat is not a future statement; it\'s an ordinary import statement with\nno special semantics or syntax restrictions.\n\nCode compiled by calls to the built-in functions ``exec()`` and\n``compile()`` that occur in a module ``M`` containing a future\nstatement will, by default, use the new syntax or semantics associated\nwith the future statement. This can be controlled by optional\narguments to ``compile()`` --- see the documentation of that function\nfor details.\n\nA future statement typed at an interactive interpreter prompt will\ntake effect for the rest of the interpreter session. If an\ninterpreter is started with the *-i* option, is passed a script name\nto execute, and the script includes a future statement, it will be in\neffect in the interactive session started after the script is\nexecuted.\n\nSee also:\n\n **PEP 236** - Back to the __future__\n The original proposal for the __future__ mechanism.\n',
+ 'import': '\nThe ``import`` statement\n************************\n\n import_stmt ::= "import" module ["as" name] ( "," module ["as" name] )*\n | "from" relative_module "import" identifier ["as" name]\n ( "," identifier ["as" name] )*\n | "from" relative_module "import" "(" identifier ["as" name]\n ( "," identifier ["as" name] )* [","] ")"\n | "from" module "import" "*"\n module ::= (identifier ".")* identifier\n relative_module ::= "."* module | "."+\n name ::= identifier\n\nThe basic import statement (no ``from`` clause) is executed in two\nsteps:\n\n1. find a module, loading and initializing it if necessary\n\n2. define a name or names in the local namespace for the scope where\n the ``import`` statement occurs.\n\nWhen the statement contains multiple clauses (separated by commas) the\ntwo steps are carried out separately for each clause, just as though\nthe clauses had been separated out into individiual import statements.\n\nThe details of the first step, finding and loading modules is\ndescribed in greater detail in the section on the *import system*,\nwhich also describes the various types of packages and modules that\ncan be imported, as well as all the hooks that can be used to\ncustomize the import system. Note that failures in this step may\nindicate either that the module could not be located, *or* that an\nerror occurred while initializing the module, which includes execution\nof the module\'s code.\n\nIf the requested module is retrieved successfully, it will be made\navailable in the local namespace in one of three ways:\n\n* If the module name is followed by ``as``, then the name following\n ``as`` is bound directly to the imported module.\n\n* If no other name is specified, and the module being imported is a\n top level module, the module\'s name is bound in the local namespace\n as a reference to the imported module\n\n* If the module being imported is *not* a top level module, then the\n name of the top level package that contains the module is bound in\n the local namespace as a reference to the top level package. The\n imported module must be accessed using its full qualified name\n rather than directly\n\nThe ``from`` form uses a slightly more complex process:\n\n1. find the module specified in the ``from`` clause loading and\n initializing it if necessary;\n\n2. for each of the identifiers specified in the ``import`` clauses:\n\n 1. check if the imported module has an attribute by that name\n\n 2. if not, attempt to import a submodule with that name and then\n check the imported module again for that attribute\n\n 3. if the attribute is not found, ``ImportError`` is raised.\n\n 4. otherwise, a reference to that value is bound in the local\n namespace, using the name in the ``as`` clause if it is present,\n otherwise using the attribute name\n\nExamples:\n\n import foo # foo imported and bound locally\n import foo.bar.baz # foo.bar.baz imported, foo bound locally\n import foo.bar.baz as fbb # foo.bar.baz imported and bound as fbb\n from foo.bar import baz # foo.bar.baz imported and bound as baz\n from foo import attr # foo imported and foo.attr bound as attr\n\nIf the list of identifiers is replaced by a star (``\'*\'``), all public\nnames defined in the module are bound in the local namespace for the\nscope where the ``import`` statement occurs.\n\nThe *public names* defined by a module are determined by checking the\nmodule\'s namespace for a variable named ``__all__``; if defined, it\nmust be a sequence of strings which are names defined or imported by\nthat module. The names given in ``__all__`` are all considered public\nand are required to exist. If ``__all__`` is not defined, the set of\npublic names includes all names found in the module\'s namespace which\ndo not begin with an underscore character (``\'_\'``). ``__all__``\nshould contain the entire public API. It is intended to avoid\naccidentally exporting items that are not part of the API (such as\nlibrary modules which were imported and used within the module).\n\nThe ``from`` form with ``*`` may only occur in a module scope.\nAttempting to use it in class or function definitions will raise a\n``SyntaxError``.\n\nThe *public names* defined by a module are determined by checking the\nmodule\'s namespace for a variable named ``__all__``; if defined, it\nmust be a sequence of strings which are names defined or imported by\nthat module. The names given in ``__all__`` are all considered public\nand are required to exist. If ``__all__`` is not defined, the set of\npublic names includes all names found in the module\'s namespace which\ndo not begin with an underscore character (``\'_\'``). ``__all__``\nshould contain the entire public API. It is intended to avoid\naccidentally exporting items that are not part of the API (such as\nlibrary modules which were imported and used within the module).\n\nThe ``from`` form with ``*`` may only occur in a module scope. The\nwild card form of import --- ``import *`` --- is only allowed at the\nmodule level. Attempting to use it in class or function definitions\nwill raise a ``SyntaxError``.\n\nWhen specifying what module to import you do not have to specify the\nabsolute name of the module. When a module or package is contained\nwithin another package it is possible to make a relative import within\nthe same top package without having to mention the package name. By\nusing leading dots in the specified module or package after ``from``\nyou can specify how high to traverse up the current package hierarchy\nwithout specifying exact names. One leading dot means the current\npackage where the module making the import exists. Two dots means up\none package level. Three dots is up two levels, etc. So if you execute\n``from . import mod`` from a module in the ``pkg`` package then you\nwill end up importing ``pkg.mod``. If you execute ``from ..subpkg2\nimport mod`` from within ``pkg.subpkg1`` you will import\n``pkg.subpkg2.mod``. The specification for relative imports is\ncontained within **PEP 328**.\n\n``importlib.import_module()`` is provided to support applications that\ndetermine which modules need to be loaded dynamically.\n\n\nFuture statements\n=================\n\nA *future statement* is a directive to the compiler that a particular\nmodule should be compiled using syntax or semantics that will be\navailable in a specified future release of Python. The future\nstatement is intended to ease migration to future versions of Python\nthat introduce incompatible changes to the language. It allows use of\nthe new features on a per-module basis before the release in which the\nfeature becomes standard.\n\n future_statement ::= "from" "__future__" "import" feature ["as" name]\n ("," feature ["as" name])*\n | "from" "__future__" "import" "(" feature ["as" name]\n ("," feature ["as" name])* [","] ")"\n feature ::= identifier\n name ::= identifier\n\nA future statement must appear near the top of the module. The only\nlines that can appear before a future statement are:\n\n* the module docstring (if any),\n\n* comments,\n\n* blank lines, and\n\n* other future statements.\n\nThe features recognized by Python 3.0 are ``absolute_import``,\n``division``, ``generators``, ``unicode_literals``,\n``print_function``, ``nested_scopes`` and ``with_statement``. They\nare all redundant because they are always enabled, and only kept for\nbackwards compatibility.\n\nA future statement is recognized and treated specially at compile\ntime: Changes to the semantics of core constructs are often\nimplemented by generating different code. It may even be the case\nthat a new feature introduces new incompatible syntax (such as a new\nreserved word), in which case the compiler may need to parse the\nmodule differently. Such decisions cannot be pushed off until\nruntime.\n\nFor any given release, the compiler knows which feature names have\nbeen defined, and raises a compile-time error if a future statement\ncontains a feature not known to it.\n\nThe direct runtime semantics are the same as for any import statement:\nthere is a standard module ``__future__``, described later, and it\nwill be imported in the usual way at the time the future statement is\nexecuted.\n\nThe interesting runtime semantics depend on the specific feature\nenabled by the future statement.\n\nNote that there is nothing special about the statement:\n\n import __future__ [as name]\n\nThat is not a future statement; it\'s an ordinary import statement with\nno special semantics or syntax restrictions.\n\nCode compiled by calls to the built-in functions ``exec()`` and\n``compile()`` that occur in a module ``M`` containing a future\nstatement will, by default, use the new syntax or semantics associated\nwith the future statement. This can be controlled by optional\narguments to ``compile()`` --- see the documentation of that function\nfor details.\n\nA future statement typed at an interactive interpreter prompt will\ntake effect for the rest of the interpreter session. If an\ninterpreter is started with the *-i* option, is passed a script name\nto execute, and the script includes a future statement, it will be in\neffect in the interactive session started after the script is\nexecuted.\n\nSee also:\n\n **PEP 236** - Back to the __future__\n The original proposal for the __future__ mechanism.\n',
'in': '\nComparisons\n***********\n\nUnlike C, all comparison operations in Python have the same priority,\nwhich is lower than that of any arithmetic, shifting or bitwise\noperation. Also unlike C, expressions like ``a < b < c`` have the\ninterpretation that is conventional in mathematics:\n\n comparison ::= or_expr ( comp_operator or_expr )*\n comp_operator ::= "<" | ">" | "==" | ">=" | "<=" | "!="\n | "is" ["not"] | ["not"] "in"\n\nComparisons yield boolean values: ``True`` or ``False``.\n\nComparisons can be chained arbitrarily, e.g., ``x < y <= z`` is\nequivalent to ``x < y and y <= z``, except that ``y`` is evaluated\nonly once (but in both cases ``z`` is not evaluated at all when ``x <\ny`` is found to be false).\n\nFormally, if *a*, *b*, *c*, ..., *y*, *z* are expressions and *op1*,\n*op2*, ..., *opN* are comparison operators, then ``a op1 b op2 c ... y\nopN z`` is equivalent to ``a op1 b and b op2 c and ... y opN z``,\nexcept that each expression is evaluated at most once.\n\nNote that ``a op1 b op2 c`` doesn\'t imply any kind of comparison\nbetween *a* and *c*, so that, e.g., ``x < y > z`` is perfectly legal\n(though perhaps not pretty).\n\nThe operators ``<``, ``>``, ``==``, ``>=``, ``<=``, and ``!=`` compare\nthe values of two objects. The objects need not have the same type.\nIf both are numbers, they are converted to a common type. Otherwise,\nthe ``==`` and ``!=`` operators *always* consider objects of different\ntypes to be unequal, while the ``<``, ``>``, ``>=`` and ``<=``\noperators raise a ``TypeError`` when comparing objects of different\ntypes that do not implement these operators for the given pair of\ntypes. You can control comparison behavior of objects of non-built-in\ntypes by defining rich comparison methods like ``__gt__()``, described\nin section *Basic customization*.\n\nComparison of objects of the same type depends on the type:\n\n* Numbers are compared arithmetically.\n\n* The values ``float(\'NaN\')`` and ``Decimal(\'NaN\')`` are special. The\n are identical to themselves, ``x is x`` but are not equal to\n themselves, ``x != x``. Additionally, comparing any value to a\n not-a-number value will return ``False``. For example, both ``3 <\n float(\'NaN\')`` and ``float(\'NaN\') < 3`` will return ``False``.\n\n* Bytes objects are compared lexicographically using the numeric\n values of their elements.\n\n* Strings are compared lexicographically using the numeric equivalents\n (the result of the built-in function ``ord()``) of their characters.\n [3] String and bytes object can\'t be compared!\n\n* Tuples and lists are compared lexicographically using comparison of\n corresponding elements. This means that to compare equal, each\n element must compare equal and the two sequences must be of the same\n type and have the same length.\n\n If not equal, the sequences are ordered the same as their first\n differing elements. For example, ``[1,2,x] <= [1,2,y]`` has the\n same value as ``x <= y``. If the corresponding element does not\n exist, the shorter sequence is ordered first (for example, ``[1,2] <\n [1,2,3]``).\n\n* Mappings (dictionaries) compare equal if and only if they have the\n same ``(key, value)`` pairs. Order comparisons ``(\'<\', \'<=\', \'>=\',\n \'>\')`` raise ``TypeError``.\n\n* Sets and frozensets define comparison operators to mean subset and\n superset tests. Those relations do not define total orderings (the\n two sets ``{1,2}`` and {2,3} are not equal, nor subsets of one\n another, nor supersets of one another). Accordingly, sets are not\n appropriate arguments for functions which depend on total ordering.\n For example, ``min()``, ``max()``, and ``sorted()`` produce\n undefined results given a list of sets as inputs.\n\n* Most other objects of built-in types compare unequal unless they are\n the same object; the choice whether one object is considered smaller\n or larger than another one is made arbitrarily but consistently\n within one execution of a program.\n\nComparison of objects of the differing types depends on whether either\nof the types provide explicit support for the comparison. Most\nnumeric types can be compared with one another, but comparisons of\n``float`` and ``Decimal`` are not supported to avoid the inevitable\nconfusion arising from representation issues such as ``float(\'1.1\')``\nbeing inexactly represented and therefore not exactly equal to\n``Decimal(\'1.1\')`` which is. When cross-type comparison is not\nsupported, the comparison method returns ``NotImplemented``. This can\ncreate the illusion of non-transitivity between supported cross-type\ncomparisons and unsupported comparisons. For example, ``Decimal(2) ==\n2`` and ``2 == float(2)`` but ``Decimal(2) != float(2)``.\n\nThe operators ``in`` and ``not in`` test for membership. ``x in s``\nevaluates to true if *x* is a member of *s*, and false otherwise. ``x\nnot in s`` returns the negation of ``x in s``. All built-in sequences\nand set types support this as well as dictionary, for which ``in``\ntests whether a the dictionary has a given key. For container types\nsuch as list, tuple, set, frozenset, dict, or collections.deque, the\nexpression ``x in y`` is equivalent to ``any(x is e or x == e for e in\ny)``.\n\nFor the string and bytes types, ``x in y`` is true if and only if *x*\nis a substring of *y*. An equivalent test is ``y.find(x) != -1``.\nEmpty strings are always considered to be a substring of any other\nstring, so ``"" in "abc"`` will return ``True``.\n\nFor user-defined classes which define the ``__contains__()`` method,\n``x in y`` is true if and only if ``y.__contains__(x)`` is true.\n\nFor user-defined classes which do not define ``__contains__()`` but do\ndefine ``__iter__()``, ``x in y`` is true if some value ``z`` with ``x\n== z`` is produced while iterating over ``y``. If an exception is\nraised during the iteration, it is as if ``in`` raised that exception.\n\nLastly, the old-style iteration protocol is tried: if a class defines\n``__getitem__()``, ``x in y`` is true if and only if there is a non-\nnegative integer index *i* such that ``x == y[i]``, and all lower\ninteger indices do not raise ``IndexError`` exception. (If any other\nexception is raised, it is as if ``in`` raised that exception).\n\nThe operator ``not in`` is defined to have the inverse true value of\n``in``.\n\nThe operators ``is`` and ``is not`` test for object identity: ``x is\ny`` is true if and only if *x* and *y* are the same object. ``x is\nnot y`` yields the inverse truth value. [4]\n',
'integers': '\nInteger literals\n****************\n\nInteger literals are described by the following lexical definitions:\n\n integer ::= decimalinteger | octinteger | hexinteger | bininteger\n decimalinteger ::= nonzerodigit digit* | "0"+\n nonzerodigit ::= "1"..."9"\n digit ::= "0"..."9"\n octinteger ::= "0" ("o" | "O") octdigit+\n hexinteger ::= "0" ("x" | "X") hexdigit+\n bininteger ::= "0" ("b" | "B") bindigit+\n octdigit ::= "0"..."7"\n hexdigit ::= digit | "a"..."f" | "A"..."F"\n bindigit ::= "0" | "1"\n\nThere is no limit for the length of integer literals apart from what\ncan be stored in available memory.\n\nNote that leading zeros in a non-zero decimal number are not allowed.\nThis is for disambiguation with C-style octal literals, which Python\nused before version 3.0.\n\nSome examples of integer literals:\n\n 7 2147483647 0o177 0b100110111\n 3 79228162514264337593543950336 0o377 0x100000000\n 79228162514264337593543950336 0xdeadbeef\n',
'lambda': '\nLambdas\n*******\n\n lambda_form ::= "lambda" [parameter_list]: expression\n lambda_form_nocond ::= "lambda" [parameter_list]: expression_nocond\n\nLambda forms (lambda expressions) have the same syntactic position as\nexpressions. They are a shorthand to create anonymous functions; the\nexpression ``lambda arguments: expression`` yields a function object.\nThe unnamed object behaves like a function object defined with\n\n def <lambda>(arguments):\n return expression\n\nSee section *Function definitions* for the syntax of parameter lists.\nNote that functions created with lambda forms cannot contain\nstatements or annotations.\n',
@@ -49,30 +50,30 @@ topics = {'assert': '\nThe ``assert`` statement\n************************\n\nAss
'nonlocal': '\nThe ``nonlocal`` statement\n**************************\n\n nonlocal_stmt ::= "nonlocal" identifier ("," identifier)*\n\nThe ``nonlocal`` statement causes the listed identifiers to refer to\npreviously bound variables in the nearest enclosing scope. This is\nimportant because the default behavior for binding is to search the\nlocal namespace first. The statement allows encapsulated code to\nrebind variables outside of the local scope besides the global\n(module) scope.\n\nNames listed in a ``nonlocal`` statement, unlike to those listed in a\n``global`` statement, must refer to pre-existing bindings in an\nenclosing scope (the scope in which a new binding should be created\ncannot be determined unambiguously).\n\nNames listed in a ``nonlocal`` statement must not collide with pre-\nexisting bindings in the local scope.\n\nSee also:\n\n **PEP 3104** - Access to Names in Outer Scopes\n The specification for the ``nonlocal`` statement.\n',
'numbers': "\nNumeric literals\n****************\n\nThere are three types of numeric literals: integers, floating point\nnumbers, and imaginary numbers. There are no complex literals\n(complex numbers can be formed by adding a real number and an\nimaginary number).\n\nNote that numeric literals do not include a sign; a phrase like ``-1``\nis actually an expression composed of the unary operator '``-``' and\nthe literal ``1``.\n",
'numeric-types': "\nEmulating numeric types\n***********************\n\nThe following methods can be defined to emulate numeric objects.\nMethods corresponding to operations that are not supported by the\nparticular kind of number implemented (e.g., bitwise operations for\nnon-integral numbers) should be left undefined.\n\nobject.__add__(self, other)\nobject.__sub__(self, other)\nobject.__mul__(self, other)\nobject.__truediv__(self, other)\nobject.__floordiv__(self, other)\nobject.__mod__(self, other)\nobject.__divmod__(self, other)\nobject.__pow__(self, other[, modulo])\nobject.__lshift__(self, other)\nobject.__rshift__(self, other)\nobject.__and__(self, other)\nobject.__xor__(self, other)\nobject.__or__(self, other)\n\n These methods are called to implement the binary arithmetic\n operations (``+``, ``-``, ``*``, ``/``, ``//``, ``%``,\n ``divmod()``, ``pow()``, ``**``, ``<<``, ``>>``, ``&``, ``^``,\n ``|``). For instance, to evaluate the expression ``x + y``, where\n *x* is an instance of a class that has an ``__add__()`` method,\n ``x.__add__(y)`` is called. The ``__divmod__()`` method should be\n the equivalent to using ``__floordiv__()`` and ``__mod__()``; it\n should not be related to ``__truediv__()``. Note that\n ``__pow__()`` should be defined to accept an optional third\n argument if the ternary version of the built-in ``pow()`` function\n is to be supported.\n\n If one of those methods does not support the operation with the\n supplied arguments, it should return ``NotImplemented``.\n\nobject.__radd__(self, other)\nobject.__rsub__(self, other)\nobject.__rmul__(self, other)\nobject.__rtruediv__(self, other)\nobject.__rfloordiv__(self, other)\nobject.__rmod__(self, other)\nobject.__rdivmod__(self, other)\nobject.__rpow__(self, other)\nobject.__rlshift__(self, other)\nobject.__rrshift__(self, other)\nobject.__rand__(self, other)\nobject.__rxor__(self, other)\nobject.__ror__(self, other)\n\n These methods are called to implement the binary arithmetic\n operations (``+``, ``-``, ``*``, ``/``, ``//``, ``%``,\n ``divmod()``, ``pow()``, ``**``, ``<<``, ``>>``, ``&``, ``^``,\n ``|``) with reflected (swapped) operands. These functions are only\n called if the left operand does not support the corresponding\n operation and the operands are of different types. [2] For\n instance, to evaluate the expression ``x - y``, where *y* is an\n instance of a class that has an ``__rsub__()`` method,\n ``y.__rsub__(x)`` is called if ``x.__sub__(y)`` returns\n *NotImplemented*.\n\n Note that ternary ``pow()`` will not try calling ``__rpow__()``\n (the coercion rules would become too complicated).\n\n Note: If the right operand's type is a subclass of the left operand's\n type and that subclass provides the reflected method for the\n operation, this method will be called before the left operand's\n non-reflected method. This behavior allows subclasses to\n override their ancestors' operations.\n\nobject.__iadd__(self, other)\nobject.__isub__(self, other)\nobject.__imul__(self, other)\nobject.__itruediv__(self, other)\nobject.__ifloordiv__(self, other)\nobject.__imod__(self, other)\nobject.__ipow__(self, other[, modulo])\nobject.__ilshift__(self, other)\nobject.__irshift__(self, other)\nobject.__iand__(self, other)\nobject.__ixor__(self, other)\nobject.__ior__(self, other)\n\n These methods are called to implement the augmented arithmetic\n assignments (``+=``, ``-=``, ``*=``, ``/=``, ``//=``, ``%=``,\n ``**=``, ``<<=``, ``>>=``, ``&=``, ``^=``, ``|=``). These methods\n should attempt to do the operation in-place (modifying *self*) and\n return the result (which could be, but does not have to be,\n *self*). If a specific method is not defined, the augmented\n assignment falls back to the normal methods. For instance, to\n execute the statement ``x += y``, where *x* is an instance of a\n class that has an ``__iadd__()`` method, ``x.__iadd__(y)`` is\n called. If *x* is an instance of a class that does not define a\n ``__iadd__()`` method, ``x.__add__(y)`` and ``y.__radd__(x)`` are\n considered, as with the evaluation of ``x + y``.\n\nobject.__neg__(self)\nobject.__pos__(self)\nobject.__abs__(self)\nobject.__invert__(self)\n\n Called to implement the unary arithmetic operations (``-``, ``+``,\n ``abs()`` and ``~``).\n\nobject.__complex__(self)\nobject.__int__(self)\nobject.__float__(self)\nobject.__round__(self[, n])\n\n Called to implement the built-in functions ``complex()``,\n ``int()``, ``float()`` and ``round()``. Should return a value of\n the appropriate type.\n\nobject.__index__(self)\n\n Called to implement ``operator.index()``. Also called whenever\n Python needs an integer object (such as in slicing, or in the\n built-in ``bin()``, ``hex()`` and ``oct()`` functions). Must return\n an integer.\n",
- 'objects': '\nObjects, values and types\n*************************\n\n*Objects* are Python\'s abstraction for data. All data in a Python\nprogram is represented by objects or by relations between objects. (In\na sense, and in conformance to Von Neumann\'s model of a "stored\nprogram computer," code is also represented by objects.)\n\nEvery object has an identity, a type and a value. An object\'s\n*identity* never changes once it has been created; you may think of it\nas the object\'s address in memory. The \'``is``\' operator compares the\nidentity of two objects; the ``id()`` function returns an integer\nrepresenting its identity (currently implemented as its address). An\nobject\'s *type* is also unchangeable. [1] An object\'s type determines\nthe operations that the object supports (e.g., "does it have a\nlength?") and also defines the possible values for objects of that\ntype. The ``type()`` function returns an object\'s type (which is an\nobject itself). The *value* of some objects can change. Objects\nwhose value can change are said to be *mutable*; objects whose value\nis unchangeable once they are created are called *immutable*. (The\nvalue of an immutable container object that contains a reference to a\nmutable object can change when the latter\'s value is changed; however\nthe container is still considered immutable, because the collection of\nobjects it contains cannot be changed. So, immutability is not\nstrictly the same as having an unchangeable value, it is more subtle.)\nAn object\'s mutability is determined by its type; for instance,\nnumbers, strings and tuples are immutable, while dictionaries and\nlists are mutable.\n\nObjects are never explicitly destroyed; however, when they become\nunreachable they may be garbage-collected. An implementation is\nallowed to postpone garbage collection or omit it altogether --- it is\na matter of implementation quality how garbage collection is\nimplemented, as long as no objects are collected that are still\nreachable.\n\n**CPython implementation detail:** CPython currently uses a reference-\ncounting scheme with (optional) delayed detection of cyclically linked\ngarbage, which collects most objects as soon as they become\nunreachable, but is not guaranteed to collect garbage containing\ncircular references. See the documentation of the ``gc`` module for\ninformation on controlling the collection of cyclic garbage. Other\nimplementations act differently and CPython may change. Do not depend\non immediate finalization of objects when they become unreachable (ex:\nalways close files).\n\nNote that the use of the implementation\'s tracing or debugging\nfacilities may keep objects alive that would normally be collectable.\nAlso note that catching an exception with a \'``try``...``except``\'\nstatement may keep objects alive.\n\nSome objects contain references to "external" resources such as open\nfiles or windows. It is understood that these resources are freed\nwhen the object is garbage-collected, but since garbage collection is\nnot guaranteed to happen, such objects also provide an explicit way to\nrelease the external resource, usually a ``close()`` method. Programs\nare strongly recommended to explicitly close such objects. The\n\'``try``...``finally``\' statement and the \'``with``\' statement provide\nconvenient ways to do this.\n\nSome objects contain references to other objects; these are called\n*containers*. Examples of containers are tuples, lists and\ndictionaries. The references are part of a container\'s value. In\nmost cases, when we talk about the value of a container, we imply the\nvalues, not the identities of the contained objects; however, when we\ntalk about the mutability of a container, only the identities of the\nimmediately contained objects are implied. So, if an immutable\ncontainer (like a tuple) contains a reference to a mutable object, its\nvalue changes if that mutable object is changed.\n\nTypes affect almost all aspects of object behavior. Even the\nimportance of object identity is affected in some sense: for immutable\ntypes, operations that compute new values may actually return a\nreference to any existing object with the same type and value, while\nfor mutable objects this is not allowed. E.g., after ``a = 1; b =\n1``, ``a`` and ``b`` may or may not refer to the same object with the\nvalue one, depending on the implementation, but after ``c = []; d =\n[]``, ``c`` and ``d`` are guaranteed to refer to two different,\nunique, newly created empty lists. (Note that ``c = d = []`` assigns\nthe same object to both ``c`` and ``d``.)\n',
+ 'objects': '\nObjects, values and types\n*************************\n\n*Objects* are Python\'s abstraction for data. All data in a Python\nprogram is represented by objects or by relations between objects. (In\na sense, and in conformance to Von Neumann\'s model of a "stored\nprogram computer," code is also represented by objects.)\n\nEvery object has an identity, a type and a value. An object\'s\n*identity* never changes once it has been created; you may think of it\nas the object\'s address in memory. The \'``is``\' operator compares the\nidentity of two objects; the ``id()`` function returns an integer\nrepresenting its identity.\n\n**CPython implementation detail:** For CPython, ``id(x)`` is the\nmemory address where ``x`` is stored.\n\nAn object\'s type determines the operations that the object supports\n(e.g., "does it have a length?") and also defines the possible values\nfor objects of that type. The ``type()`` function returns an object\'s\ntype (which is an object itself). Like its identity, an object\'s\n*type* is also unchangeable. [1]\n\nThe *value* of some objects can change. Objects whose value can\nchange are said to be *mutable*; objects whose value is unchangeable\nonce they are created are called *immutable*. (The value of an\nimmutable container object that contains a reference to a mutable\nobject can change when the latter\'s value is changed; however the\ncontainer is still considered immutable, because the collection of\nobjects it contains cannot be changed. So, immutability is not\nstrictly the same as having an unchangeable value, it is more subtle.)\nAn object\'s mutability is determined by its type; for instance,\nnumbers, strings and tuples are immutable, while dictionaries and\nlists are mutable.\n\nObjects are never explicitly destroyed; however, when they become\nunreachable they may be garbage-collected. An implementation is\nallowed to postpone garbage collection or omit it altogether --- it is\na matter of implementation quality how garbage collection is\nimplemented, as long as no objects are collected that are still\nreachable.\n\n**CPython implementation detail:** CPython currently uses a reference-\ncounting scheme with (optional) delayed detection of cyclically linked\ngarbage, which collects most objects as soon as they become\nunreachable, but is not guaranteed to collect garbage containing\ncircular references. See the documentation of the ``gc`` module for\ninformation on controlling the collection of cyclic garbage. Other\nimplementations act differently and CPython may change. Do not depend\non immediate finalization of objects when they become unreachable (ex:\nalways close files).\n\nNote that the use of the implementation\'s tracing or debugging\nfacilities may keep objects alive that would normally be collectable.\nAlso note that catching an exception with a \'``try``...``except``\'\nstatement may keep objects alive.\n\nSome objects contain references to "external" resources such as open\nfiles or windows. It is understood that these resources are freed\nwhen the object is garbage-collected, but since garbage collection is\nnot guaranteed to happen, such objects also provide an explicit way to\nrelease the external resource, usually a ``close()`` method. Programs\nare strongly recommended to explicitly close such objects. The\n\'``try``...``finally``\' statement and the \'``with``\' statement provide\nconvenient ways to do this.\n\nSome objects contain references to other objects; these are called\n*containers*. Examples of containers are tuples, lists and\ndictionaries. The references are part of a container\'s value. In\nmost cases, when we talk about the value of a container, we imply the\nvalues, not the identities of the contained objects; however, when we\ntalk about the mutability of a container, only the identities of the\nimmediately contained objects are implied. So, if an immutable\ncontainer (like a tuple) contains a reference to a mutable object, its\nvalue changes if that mutable object is changed.\n\nTypes affect almost all aspects of object behavior. Even the\nimportance of object identity is affected in some sense: for immutable\ntypes, operations that compute new values may actually return a\nreference to any existing object with the same type and value, while\nfor mutable objects this is not allowed. E.g., after ``a = 1; b =\n1``, ``a`` and ``b`` may or may not refer to the same object with the\nvalue one, depending on the implementation, but after ``c = []; d =\n[]``, ``c`` and ``d`` are guaranteed to refer to two different,\nunique, newly created empty lists. (Note that ``c = d = []`` assigns\nthe same object to both ``c`` and ``d``.)\n',
'operator-summary': '\nSummary\n*******\n\nThe following table summarizes the operator precedences in Python,\nfrom lowest precedence (least binding) to highest precedence (most\nbinding). Operators in the same box have the same precedence. Unless\nthe syntax is explicitly given, operators are binary. Operators in\nthe same box group left to right (except for comparisons, including\ntests, which all have the same precedence and chain from left to right\n--- see section *Comparisons* --- and exponentiation, which groups\nfrom right to left).\n\n+-------------------------------------------------+---------------------------------------+\n| Operator | Description |\n+=================================================+=======================================+\n| ``lambda`` | Lambda expression |\n+-------------------------------------------------+---------------------------------------+\n| ``if`` -- ``else`` | Conditional expression |\n+-------------------------------------------------+---------------------------------------+\n| ``or`` | Boolean OR |\n+-------------------------------------------------+---------------------------------------+\n| ``and`` | Boolean AND |\n+-------------------------------------------------+---------------------------------------+\n| ``not`` *x* | Boolean NOT |\n+-------------------------------------------------+---------------------------------------+\n| ``in``, ``not`` ``in``, ``is``, ``is not``, | Comparisons, including membership |\n| ``<``, ``<=``, ``>``, ``>=``, ``!=``, ``==`` | tests and identity tests, |\n+-------------------------------------------------+---------------------------------------+\n| ``|`` | Bitwise OR |\n+-------------------------------------------------+---------------------------------------+\n| ``^`` | Bitwise XOR |\n+-------------------------------------------------+---------------------------------------+\n| ``&`` | Bitwise AND |\n+-------------------------------------------------+---------------------------------------+\n| ``<<``, ``>>`` | Shifts |\n+-------------------------------------------------+---------------------------------------+\n| ``+``, ``-`` | Addition and subtraction |\n+-------------------------------------------------+---------------------------------------+\n| ``*``, ``/``, ``//``, ``%`` | Multiplication, division, remainder |\n| | [5] |\n+-------------------------------------------------+---------------------------------------+\n| ``+x``, ``-x``, ``~x`` | Positive, negative, bitwise NOT |\n+-------------------------------------------------+---------------------------------------+\n| ``**`` | Exponentiation [6] |\n+-------------------------------------------------+---------------------------------------+\n| ``x[index]``, ``x[index:index]``, | Subscription, slicing, call, |\n| ``x(arguments...)``, ``x.attribute`` | attribute reference |\n+-------------------------------------------------+---------------------------------------+\n| ``(expressions...)``, ``[expressions...]``, | Binding or tuple display, list |\n| ``{key:datum...}``, ``{expressions...}`` | display, dictionary display, set |\n| | display |\n+-------------------------------------------------+---------------------------------------+\n\n-[ Footnotes ]-\n\n[1] While ``abs(x%y) < abs(y)`` is true mathematically, for floats it\n may not be true numerically due to roundoff. For example, and\n assuming a platform on which a Python float is an IEEE 754 double-\n precision number, in order that ``-1e-100 % 1e100`` have the same\n sign as ``1e100``, the computed result is ``-1e-100 + 1e100``,\n which is numerically exactly equal to ``1e100``. The function\n ``math.fmod()`` returns a result whose sign matches the sign of\n the first argument instead, and so returns ``-1e-100`` in this\n case. Which approach is more appropriate depends on the\n application.\n\n[2] If x is very close to an exact integer multiple of y, it\'s\n possible for ``x//y`` to be one larger than ``(x-x%y)//y`` due to\n rounding. In such cases, Python returns the latter result, in\n order to preserve that ``divmod(x,y)[0] * y + x % y`` be very\n close to ``x``.\n\n[3] While comparisons between strings make sense at the byte level,\n they may be counter-intuitive to users. For example, the strings\n ``"\\u00C7"`` and ``"\\u0327\\u0043"`` compare differently, even\n though they both represent the same unicode character (LATIN\n CAPITAL LETTER C WITH CEDILLA). To compare strings in a human\n recognizable way, compare using ``unicodedata.normalize()``.\n\n[4] Due to automatic garbage-collection, free lists, and the dynamic\n nature of descriptors, you may notice seemingly unusual behaviour\n in certain uses of the ``is`` operator, like those involving\n comparisons between instance methods, or constants. Check their\n documentation for more info.\n\n[5] The ``%`` operator is also used for string formatting; the same\n precedence applies.\n\n[6] The power operator ``**`` binds less tightly than an arithmetic or\n bitwise unary operator on its right, that is, ``2**-1`` is\n ``0.5``.\n',
'pass': '\nThe ``pass`` statement\n**********************\n\n pass_stmt ::= "pass"\n\n``pass`` is a null operation --- when it is executed, nothing happens.\nIt is useful as a placeholder when a statement is required\nsyntactically, but no code needs to be executed, for example:\n\n def f(arg): pass # a function that does nothing (yet)\n\n class C: pass # a class with no methods (yet)\n',
'power': '\nThe power operator\n******************\n\nThe power operator binds more tightly than unary operators on its\nleft; it binds less tightly than unary operators on its right. The\nsyntax is:\n\n power ::= primary ["**" u_expr]\n\nThus, in an unparenthesized sequence of power and unary operators, the\noperators are evaluated from right to left (this does not constrain\nthe evaluation order for the operands): ``-1**2`` results in ``-1``.\n\nThe power operator has the same semantics as the built-in ``pow()``\nfunction, when called with two arguments: it yields its left argument\nraised to the power of its right argument. The numeric arguments are\nfirst converted to a common type, and the result is of that type.\n\nFor int operands, the result has the same type as the operands unless\nthe second argument is negative; in that case, all arguments are\nconverted to float and a float result is delivered. For example,\n``10**2`` returns ``100``, but ``10**-2`` returns ``0.01``.\n\nRaising ``0.0`` to a negative power results in a\n``ZeroDivisionError``. Raising a negative number to a fractional power\nresults in a ``complex`` number. (In earlier versions it raised a\n``ValueError``.)\n',
'raise': '\nThe ``raise`` statement\n***********************\n\n raise_stmt ::= "raise" [expression ["from" expression]]\n\nIf no expressions are present, ``raise`` re-raises the last exception\nthat was active in the current scope. If no exception is active in\nthe current scope, a ``RuntimeError`` exception is raised indicating\nthat this is an error.\n\nOtherwise, ``raise`` evaluates the first expression as the exception\nobject. It must be either a subclass or an instance of\n``BaseException``. If it is a class, the exception instance will be\nobtained when needed by instantiating the class with no arguments.\n\nThe *type* of the exception is the exception instance\'s class, the\n*value* is the instance itself.\n\nA traceback object is normally created automatically when an exception\nis raised and attached to it as the ``__traceback__`` attribute, which\nis writable. You can create an exception and set your own traceback in\none step using the ``with_traceback()`` exception method (which\nreturns the same exception instance, with its traceback set to its\nargument), like so:\n\n raise Exception("foo occurred").with_traceback(tracebackobj)\n\nThe ``from`` clause is used for exception chaining: if given, the\nsecond *expression* must be another exception class or instance, which\nwill then be attached to the raised exception as the ``__cause__``\nattribute (which is writable). If the raised exception is not\nhandled, both exceptions will be printed:\n\n >>> try:\n ... print(1 / 0)\n ... except Exception as exc:\n ... raise RuntimeError("Something bad happened") from exc\n ...\n Traceback (most recent call last):\n File "<stdin>", line 2, in <module>\n ZeroDivisionError: int division or modulo by zero\n\n The above exception was the direct cause of the following exception:\n\n Traceback (most recent call last):\n File "<stdin>", line 4, in <module>\n RuntimeError: Something bad happened\n\nA similar mechanism works implicitly if an exception is raised inside\nan exception handler: the previous exception is then attached as the\nnew exception\'s ``__context__`` attribute:\n\n >>> try:\n ... print(1 / 0)\n ... except:\n ... raise RuntimeError("Something bad happened")\n ...\n Traceback (most recent call last):\n File "<stdin>", line 2, in <module>\n ZeroDivisionError: int division or modulo by zero\n\n During handling of the above exception, another exception occurred:\n\n Traceback (most recent call last):\n File "<stdin>", line 4, in <module>\n RuntimeError: Something bad happened\n\nAdditional information on exceptions can be found in section\n*Exceptions*, and information about handling exceptions is in section\n*The try statement*.\n',
- 'return': '\nThe ``return`` statement\n************************\n\n return_stmt ::= "return" [expression_list]\n\n``return`` may only occur syntactically nested in a function\ndefinition, not within a nested class definition.\n\nIf an expression list is present, it is evaluated, else ``None`` is\nsubstituted.\n\n``return`` leaves the current function call with the expression list\n(or ``None``) as return value.\n\nWhen ``return`` passes control out of a ``try`` statement with a\n``finally`` clause, that ``finally`` clause is executed before really\nleaving the function.\n\nIn a generator function, the ``return`` statement is not allowed to\ninclude an ``expression_list``. In that context, a bare ``return``\nindicates that the generator is done and will cause ``StopIteration``\nto be raised.\n',
+ 'return': '\nThe ``return`` statement\n************************\n\n return_stmt ::= "return" [expression_list]\n\n``return`` may only occur syntactically nested in a function\ndefinition, not within a nested class definition.\n\nIf an expression list is present, it is evaluated, else ``None`` is\nsubstituted.\n\n``return`` leaves the current function call with the expression list\n(or ``None``) as return value.\n\nWhen ``return`` passes control out of a ``try`` statement with a\n``finally`` clause, that ``finally`` clause is executed before really\nleaving the function.\n\nIn a generator function, the ``return`` statement indicates that the\ngenerator is done and will cause ``StopIteration`` to be raised. The\nreturned value (if any) is used as an argument to construct\n``StopIteration`` and becomes the ``StopIteration.value`` attribute.\n',
'sequence-types': "\nEmulating container types\n*************************\n\nThe following methods can be defined to implement container objects.\nContainers usually are sequences (such as lists or tuples) or mappings\n(like dictionaries), but can represent other containers as well. The\nfirst set of methods is used either to emulate a sequence or to\nemulate a mapping; the difference is that for a sequence, the\nallowable keys should be the integers *k* for which ``0 <= k < N``\nwhere *N* is the length of the sequence, or slice objects, which\ndefine a range of items. It is also recommended that mappings provide\nthe methods ``keys()``, ``values()``, ``items()``, ``get()``,\n``clear()``, ``setdefault()``, ``pop()``, ``popitem()``, ``copy()``,\nand ``update()`` behaving similar to those for Python's standard\ndictionary objects. The ``collections`` module provides a\n``MutableMapping`` abstract base class to help create those methods\nfrom a base set of ``__getitem__()``, ``__setitem__()``,\n``__delitem__()``, and ``keys()``. Mutable sequences should provide\nmethods ``append()``, ``count()``, ``index()``, ``extend()``,\n``insert()``, ``pop()``, ``remove()``, ``reverse()`` and ``sort()``,\nlike Python standard list objects. Finally, sequence types should\nimplement addition (meaning concatenation) and multiplication (meaning\nrepetition) by defining the methods ``__add__()``, ``__radd__()``,\n``__iadd__()``, ``__mul__()``, ``__rmul__()`` and ``__imul__()``\ndescribed below; they should not define other numerical operators. It\nis recommended that both mappings and sequences implement the\n``__contains__()`` method to allow efficient use of the ``in``\noperator; for mappings, ``in`` should search the mapping's keys; for\nsequences, it should search through the values. It is further\nrecommended that both mappings and sequences implement the\n``__iter__()`` method to allow efficient iteration through the\ncontainer; for mappings, ``__iter__()`` should be the same as\n``keys()``; for sequences, it should iterate through the values.\n\nobject.__len__(self)\n\n Called to implement the built-in function ``len()``. Should return\n the length of the object, an integer ``>=`` 0. Also, an object\n that doesn't define a ``__bool__()`` method and whose ``__len__()``\n method returns zero is considered to be false in a Boolean context.\n\nNote: Slicing is done exclusively with the following three methods. A\n call like\n\n a[1:2] = b\n\n is translated to\n\n a[slice(1, 2, None)] = b\n\n and so forth. Missing slice items are always filled in with\n ``None``.\n\nobject.__getitem__(self, key)\n\n Called to implement evaluation of ``self[key]``. For sequence\n types, the accepted keys should be integers and slice objects.\n Note that the special interpretation of negative indexes (if the\n class wishes to emulate a sequence type) is up to the\n ``__getitem__()`` method. If *key* is of an inappropriate type,\n ``TypeError`` may be raised; if of a value outside the set of\n indexes for the sequence (after any special interpretation of\n negative values), ``IndexError`` should be raised. For mapping\n types, if *key* is missing (not in the container), ``KeyError``\n should be raised.\n\n Note: ``for`` loops expect that an ``IndexError`` will be raised for\n illegal indexes to allow proper detection of the end of the\n sequence.\n\nobject.__setitem__(self, key, value)\n\n Called to implement assignment to ``self[key]``. Same note as for\n ``__getitem__()``. This should only be implemented for mappings if\n the objects support changes to the values for keys, or if new keys\n can be added, or for sequences if elements can be replaced. The\n same exceptions should be raised for improper *key* values as for\n the ``__getitem__()`` method.\n\nobject.__delitem__(self, key)\n\n Called to implement deletion of ``self[key]``. Same note as for\n ``__getitem__()``. This should only be implemented for mappings if\n the objects support removal of keys, or for sequences if elements\n can be removed from the sequence. The same exceptions should be\n raised for improper *key* values as for the ``__getitem__()``\n method.\n\nobject.__iter__(self)\n\n This method is called when an iterator is required for a container.\n This method should return a new iterator object that can iterate\n over all the objects in the container. For mappings, it should\n iterate over the keys of the container, and should also be made\n available as the method ``keys()``.\n\n Iterator objects also need to implement this method; they are\n required to return themselves. For more information on iterator\n objects, see *Iterator Types*.\n\nobject.__reversed__(self)\n\n Called (if present) by the ``reversed()`` built-in to implement\n reverse iteration. It should return a new iterator object that\n iterates over all the objects in the container in reverse order.\n\n If the ``__reversed__()`` method is not provided, the\n ``reversed()`` built-in will fall back to using the sequence\n protocol (``__len__()`` and ``__getitem__()``). Objects that\n support the sequence protocol should only provide\n ``__reversed__()`` if they can provide an implementation that is\n more efficient than the one provided by ``reversed()``.\n\nThe membership test operators (``in`` and ``not in``) are normally\nimplemented as an iteration through a sequence. However, container\nobjects can supply the following special method with a more efficient\nimplementation, which also does not require the object be a sequence.\n\nobject.__contains__(self, item)\n\n Called to implement membership test operators. Should return true\n if *item* is in *self*, false otherwise. For mapping objects, this\n should consider the keys of the mapping rather than the values or\n the key-item pairs.\n\n For objects that don't define ``__contains__()``, the membership\n test first tries iteration via ``__iter__()``, then the old\n sequence iteration protocol via ``__getitem__()``, see *this\n section in the language reference*.\n",
'shifting': '\nShifting operations\n*******************\n\nThe shifting operations have lower priority than the arithmetic\noperations:\n\n shift_expr ::= a_expr | shift_expr ( "<<" | ">>" ) a_expr\n\nThese operators accept integers as arguments. They shift the first\nargument to the left or right by the number of bits given by the\nsecond argument.\n\nA right shift by *n* bits is defined as division by ``pow(2,n)``. A\nleft shift by *n* bits is defined as multiplication with ``pow(2,n)``.\n\nNote: In the current implementation, the right-hand operand is required to\n be at most ``sys.maxsize``. If the right-hand operand is larger\n than ``sys.maxsize`` an ``OverflowError`` exception is raised.\n',
'slicings': '\nSlicings\n********\n\nA slicing selects a range of items in a sequence object (e.g., a\nstring, tuple or list). Slicings may be used as expressions or as\ntargets in assignment or ``del`` statements. The syntax for a\nslicing:\n\n slicing ::= primary "[" slice_list "]"\n slice_list ::= slice_item ("," slice_item)* [","]\n slice_item ::= expression | proper_slice\n proper_slice ::= [lower_bound] ":" [upper_bound] [ ":" [stride] ]\n lower_bound ::= expression\n upper_bound ::= expression\n stride ::= expression\n\nThere is ambiguity in the formal syntax here: anything that looks like\nan expression list also looks like a slice list, so any subscription\ncan be interpreted as a slicing. Rather than further complicating the\nsyntax, this is disambiguated by defining that in this case the\ninterpretation as a subscription takes priority over the\ninterpretation as a slicing (this is the case if the slice list\ncontains no proper slice).\n\nThe semantics for a slicing are as follows. The primary must evaluate\nto a mapping object, and it is indexed (using the same\n``__getitem__()`` method as normal subscription) with a key that is\nconstructed from the slice list, as follows. If the slice list\ncontains at least one comma, the key is a tuple containing the\nconversion of the slice items; otherwise, the conversion of the lone\nslice item is the key. The conversion of a slice item that is an\nexpression is that expression. The conversion of a proper slice is a\nslice object (see section *The standard type hierarchy*) whose\n``start``, ``stop`` and ``step`` attributes are the values of the\nexpressions given as lower bound, upper bound and stride,\nrespectively, substituting ``None`` for missing expressions.\n',
- 'specialattrs': '\nSpecial Attributes\n******************\n\nThe implementation adds a few special read-only attributes to several\nobject types, where they are relevant. Some of these are not reported\nby the ``dir()`` built-in function.\n\nobject.__dict__\n\n A dictionary or other mapping object used to store an object\'s\n (writable) attributes.\n\ninstance.__class__\n\n The class to which a class instance belongs.\n\nclass.__bases__\n\n The tuple of base classes of a class object.\n\nclass.__name__\n\n The name of the class or type.\n\nclass.__mro__\n\n This attribute is a tuple of classes that are considered when\n looking for base classes during method resolution.\n\nclass.mro()\n\n This method can be overridden by a metaclass to customize the\n method resolution order for its instances. It is called at class\n instantiation, and its result is stored in ``__mro__``.\n\nclass.__subclasses__()\n\n Each class keeps a list of weak references to its immediate\n subclasses. This method returns a list of all those references\n still alive. Example:\n\n >>> int.__subclasses__()\n [<class \'bool\'>]\n\n-[ Footnotes ]-\n\n[1] Additional information on these special methods may be found in\n the Python Reference Manual (*Basic customization*).\n\n[2] As a consequence, the list ``[1, 2]`` is considered equal to\n ``[1.0, 2.0]``, and similarly for tuples.\n\n[3] They must have since the parser can\'t tell the type of the\n operands.\n\n[4] Cased characters are those with general category property being\n one of "Lu" (Letter, uppercase), "Ll" (Letter, lowercase), or "Lt"\n (Letter, titlecase).\n\n[5] To format only a tuple you should therefore provide a singleton\n tuple whose only element is the tuple to be formatted.\n',
- 'specialnames': '\nSpecial method names\n********************\n\nA class can implement certain operations that are invoked by special\nsyntax (such as arithmetic operations or subscripting and slicing) by\ndefining methods with special names. This is Python\'s approach to\n*operator overloading*, allowing classes to define their own behavior\nwith respect to language operators. For instance, if a class defines\na method named ``__getitem__()``, and ``x`` is an instance of this\nclass, then ``x[i]`` is roughly equivalent to ``type(x).__getitem__(x,\ni)``. Except where mentioned, attempts to execute an operation raise\nan exception when no appropriate method is defined (typically\n``AttributeError`` or ``TypeError``).\n\nWhen implementing a class that emulates any built-in type, it is\nimportant that the emulation only be implemented to the degree that it\nmakes sense for the object being modelled. For example, some\nsequences may work well with retrieval of individual elements, but\nextracting a slice may not make sense. (One example of this is the\n``NodeList`` interface in the W3C\'s Document Object Model.)\n\n\nBasic customization\n===================\n\nobject.__new__(cls[, ...])\n\n Called to create a new instance of class *cls*. ``__new__()`` is a\n static method (special-cased so you need not declare it as such)\n that takes the class of which an instance was requested as its\n first argument. The remaining arguments are those passed to the\n object constructor expression (the call to the class). The return\n value of ``__new__()`` should be the new object instance (usually\n an instance of *cls*).\n\n Typical implementations create a new instance of the class by\n invoking the superclass\'s ``__new__()`` method using\n ``super(currentclass, cls).__new__(cls[, ...])`` with appropriate\n arguments and then modifying the newly-created instance as\n necessary before returning it.\n\n If ``__new__()`` returns an instance of *cls*, then the new\n instance\'s ``__init__()`` method will be invoked like\n ``__init__(self[, ...])``, where *self* is the new instance and the\n remaining arguments are the same as were passed to ``__new__()``.\n\n If ``__new__()`` does not return an instance of *cls*, then the new\n instance\'s ``__init__()`` method will not be invoked.\n\n ``__new__()`` is intended mainly to allow subclasses of immutable\n types (like int, str, or tuple) to customize instance creation. It\n is also commonly overridden in custom metaclasses in order to\n customize class creation.\n\nobject.__init__(self[, ...])\n\n Called when the instance is created. The arguments are those\n passed to the class constructor expression. If a base class has an\n ``__init__()`` method, the derived class\'s ``__init__()`` method,\n if any, must explicitly call it to ensure proper initialization of\n the base class part of the instance; for example:\n ``BaseClass.__init__(self, [args...])``. As a special constraint\n on constructors, no value may be returned; doing so will cause a\n ``TypeError`` to be raised at runtime.\n\nobject.__del__(self)\n\n Called when the instance is about to be destroyed. This is also\n called a destructor. If a base class has a ``__del__()`` method,\n the derived class\'s ``__del__()`` method, if any, must explicitly\n call it to ensure proper deletion of the base class part of the\n instance. Note that it is possible (though not recommended!) for\n the ``__del__()`` method to postpone destruction of the instance by\n creating a new reference to it. It may then be called at a later\n time when this new reference is deleted. It is not guaranteed that\n ``__del__()`` methods are called for objects that still exist when\n the interpreter exits.\n\n Note: ``del x`` doesn\'t directly call ``x.__del__()`` --- the former\n decrements the reference count for ``x`` by one, and the latter\n is only called when ``x``\'s reference count reaches zero. Some\n common situations that may prevent the reference count of an\n object from going to zero include: circular references between\n objects (e.g., a doubly-linked list or a tree data structure with\n parent and child pointers); a reference to the object on the\n stack frame of a function that caught an exception (the traceback\n stored in ``sys.exc_info()[2]`` keeps the stack frame alive); or\n a reference to the object on the stack frame that raised an\n unhandled exception in interactive mode (the traceback stored in\n ``sys.last_traceback`` keeps the stack frame alive). The first\n situation can only be remedied by explicitly breaking the cycles;\n the latter two situations can be resolved by storing ``None`` in\n ``sys.last_traceback``. Circular references which are garbage are\n detected when the option cycle detector is enabled (it\'s on by\n default), but can only be cleaned up if there are no Python-\n level ``__del__()`` methods involved. Refer to the documentation\n for the ``gc`` module for more information about how\n ``__del__()`` methods are handled by the cycle detector,\n particularly the description of the ``garbage`` value.\n\n Warning: Due to the precarious circumstances under which ``__del__()``\n methods are invoked, exceptions that occur during their execution\n are ignored, and a warning is printed to ``sys.stderr`` instead.\n Also, when ``__del__()`` is invoked in response to a module being\n deleted (e.g., when execution of the program is done), other\n globals referenced by the ``__del__()`` method may already have\n been deleted or in the process of being torn down (e.g. the\n import machinery shutting down). For this reason, ``__del__()``\n methods should do the absolute minimum needed to maintain\n external invariants. Starting with version 1.5, Python\n guarantees that globals whose name begins with a single\n underscore are deleted from their module before other globals are\n deleted; if no other references to such globals exist, this may\n help in assuring that imported modules are still available at the\n time when the ``__del__()`` method is called.\n\nobject.__repr__(self)\n\n Called by the ``repr()`` built-in function to compute the\n "official" string representation of an object. If at all possible,\n this should look like a valid Python expression that could be used\n to recreate an object with the same value (given an appropriate\n environment). If this is not possible, a string of the form\n ``<...some useful description...>`` should be returned. The return\n value must be a string object. If a class defines ``__repr__()``\n but not ``__str__()``, then ``__repr__()`` is also used when an\n "informal" string representation of instances of that class is\n required.\n\n This is typically used for debugging, so it is important that the\n representation is information-rich and unambiguous.\n\nobject.__str__(self)\n\n Called by the ``str()`` built-in function and by the ``print()``\n function to compute the "informal" string representation of an\n object. This differs from ``__repr__()`` in that it does not have\n to be a valid Python expression: a more convenient or concise\n representation may be used instead. The return value must be a\n string object.\n\nobject.__bytes__(self)\n\n Called by ``bytes()`` to compute a byte-string representation of an\n object. This should return a ``bytes`` object.\n\nobject.__format__(self, format_spec)\n\n Called by the ``format()`` built-in function (and by extension, the\n ``format()`` method of class ``str``) to produce a "formatted"\n string representation of an object. The ``format_spec`` argument is\n a string that contains a description of the formatting options\n desired. The interpretation of the ``format_spec`` argument is up\n to the type implementing ``__format__()``, however most classes\n will either delegate formatting to one of the built-in types, or\n use a similar formatting option syntax.\n\n See *Format Specification Mini-Language* for a description of the\n standard formatting syntax.\n\n The return value must be a string object.\n\nobject.__lt__(self, other)\nobject.__le__(self, other)\nobject.__eq__(self, other)\nobject.__ne__(self, other)\nobject.__gt__(self, other)\nobject.__ge__(self, other)\n\n These are the so-called "rich comparison" methods. The\n correspondence between operator symbols and method names is as\n follows: ``x<y`` calls ``x.__lt__(y)``, ``x<=y`` calls\n ``x.__le__(y)``, ``x==y`` calls ``x.__eq__(y)``, ``x!=y`` calls\n ``x.__ne__(y)``, ``x>y`` calls ``x.__gt__(y)``, and ``x>=y`` calls\n ``x.__ge__(y)``.\n\n A rich comparison method may return the singleton\n ``NotImplemented`` if it does not implement the operation for a\n given pair of arguments. By convention, ``False`` and ``True`` are\n returned for a successful comparison. However, these methods can\n return any value, so if the comparison operator is used in a\n Boolean context (e.g., in the condition of an ``if`` statement),\n Python will call ``bool()`` on the value to determine if the result\n is true or false.\n\n There are no implied relationships among the comparison operators.\n The truth of ``x==y`` does not imply that ``x!=y`` is false.\n Accordingly, when defining ``__eq__()``, one should also define\n ``__ne__()`` so that the operators will behave as expected. See\n the paragraph on ``__hash__()`` for some important notes on\n creating *hashable* objects which support custom comparison\n operations and are usable as dictionary keys.\n\n There are no swapped-argument versions of these methods (to be used\n when the left argument does not support the operation but the right\n argument does); rather, ``__lt__()`` and ``__gt__()`` are each\n other\'s reflection, ``__le__()`` and ``__ge__()`` are each other\'s\n reflection, and ``__eq__()`` and ``__ne__()`` are their own\n reflection.\n\n Arguments to rich comparison methods are never coerced.\n\n To automatically generate ordering operations from a single root\n operation, see ``functools.total_ordering()``.\n\nobject.__hash__(self)\n\n Called by built-in function ``hash()`` and for operations on\n members of hashed collections including ``set``, ``frozenset``, and\n ``dict``. ``__hash__()`` should return an integer. The only\n required property is that objects which compare equal have the same\n hash value; it is advised to somehow mix together (e.g. using\n exclusive or) the hash values for the components of the object that\n also play a part in comparison of objects.\n\n If a class does not define an ``__eq__()`` method it should not\n define a ``__hash__()`` operation either; if it defines\n ``__eq__()`` but not ``__hash__()``, its instances will not be\n usable as items in hashable collections. If a class defines\n mutable objects and implements an ``__eq__()`` method, it should\n not implement ``__hash__()``, since the implementation of hashable\n collections requires that a key\'s hash value is immutable (if the\n object\'s hash value changes, it will be in the wrong hash bucket).\n\n User-defined classes have ``__eq__()`` and ``__hash__()`` methods\n by default; with them, all objects compare unequal (except with\n themselves) and ``x.__hash__()`` returns ``id(x)``.\n\n Classes which inherit a ``__hash__()`` method from a parent class\n but change the meaning of ``__eq__()`` such that the hash value\n returned is no longer appropriate (e.g. by switching to a value-\n based concept of equality instead of the default identity based\n equality) can explicitly flag themselves as being unhashable by\n setting ``__hash__ = None`` in the class definition. Doing so means\n that not only will instances of the class raise an appropriate\n ``TypeError`` when a program attempts to retrieve their hash value,\n but they will also be correctly identified as unhashable when\n checking ``isinstance(obj, collections.Hashable)`` (unlike classes\n which define their own ``__hash__()`` to explicitly raise\n ``TypeError``).\n\n If a class that overrides ``__eq__()`` needs to retain the\n implementation of ``__hash__()`` from a parent class, the\n interpreter must be told this explicitly by setting ``__hash__ =\n <ParentClass>.__hash__``. Otherwise the inheritance of\n ``__hash__()`` will be blocked, just as if ``__hash__`` had been\n explicitly set to ``None``.\n\n See also the *-R* command-line option.\n\nobject.__bool__(self)\n\n Called to implement truth value testing and the built-in operation\n ``bool()``; should return ``False`` or ``True``. When this method\n is not defined, ``__len__()`` is called, if it is defined, and the\n object is considered true if its result is nonzero. If a class\n defines neither ``__len__()`` nor ``__bool__()``, all its instances\n are considered true.\n\n\nCustomizing attribute access\n============================\n\nThe following methods can be defined to customize the meaning of\nattribute access (use of, assignment to, or deletion of ``x.name``)\nfor class instances.\n\nobject.__getattr__(self, name)\n\n Called when an attribute lookup has not found the attribute in the\n usual places (i.e. it is not an instance attribute nor is it found\n in the class tree for ``self``). ``name`` is the attribute name.\n This method should return the (computed) attribute value or raise\n an ``AttributeError`` exception.\n\n Note that if the attribute is found through the normal mechanism,\n ``__getattr__()`` is not called. (This is an intentional asymmetry\n between ``__getattr__()`` and ``__setattr__()``.) This is done both\n for efficiency reasons and because otherwise ``__getattr__()``\n would have no way to access other attributes of the instance. Note\n that at least for instance variables, you can fake total control by\n not inserting any values in the instance attribute dictionary (but\n instead inserting them in another object). See the\n ``__getattribute__()`` method below for a way to actually get total\n control over attribute access.\n\nobject.__getattribute__(self, name)\n\n Called unconditionally to implement attribute accesses for\n instances of the class. If the class also defines\n ``__getattr__()``, the latter will not be called unless\n ``__getattribute__()`` either calls it explicitly or raises an\n ``AttributeError``. This method should return the (computed)\n attribute value or raise an ``AttributeError`` exception. In order\n to avoid infinite recursion in this method, its implementation\n should always call the base class method with the same name to\n access any attributes it needs, for example,\n ``object.__getattribute__(self, name)``.\n\n Note: This method may still be bypassed when looking up special methods\n as the result of implicit invocation via language syntax or\n built-in functions. See *Special method lookup*.\n\nobject.__setattr__(self, name, value)\n\n Called when an attribute assignment is attempted. This is called\n instead of the normal mechanism (i.e. store the value in the\n instance dictionary). *name* is the attribute name, *value* is the\n value to be assigned to it.\n\n If ``__setattr__()`` wants to assign to an instance attribute, it\n should call the base class method with the same name, for example,\n ``object.__setattr__(self, name, value)``.\n\nobject.__delattr__(self, name)\n\n Like ``__setattr__()`` but for attribute deletion instead of\n assignment. This should only be implemented if ``del obj.name`` is\n meaningful for the object.\n\nobject.__dir__(self)\n\n Called when ``dir()`` is called on the object. A list must be\n returned.\n\n\nImplementing Descriptors\n------------------------\n\nThe following methods only apply when an instance of the class\ncontaining the method (a so-called *descriptor* class) appears in an\n*owner* class (the descriptor must be in either the owner\'s class\ndictionary or in the class dictionary for one of its parents). In the\nexamples below, "the attribute" refers to the attribute whose name is\nthe key of the property in the owner class\' ``__dict__``.\n\nobject.__get__(self, instance, owner)\n\n Called to get the attribute of the owner class (class attribute\n access) or of an instance of that class (instance attribute\n access). *owner* is always the owner class, while *instance* is the\n instance that the attribute was accessed through, or ``None`` when\n the attribute is accessed through the *owner*. This method should\n return the (computed) attribute value or raise an\n ``AttributeError`` exception.\n\nobject.__set__(self, instance, value)\n\n Called to set the attribute on an instance *instance* of the owner\n class to a new value, *value*.\n\nobject.__delete__(self, instance)\n\n Called to delete the attribute on an instance *instance* of the\n owner class.\n\n\nInvoking Descriptors\n--------------------\n\nIn general, a descriptor is an object attribute with "binding\nbehavior", one whose attribute access has been overridden by methods\nin the descriptor protocol: ``__get__()``, ``__set__()``, and\n``__delete__()``. If any of those methods are defined for an object,\nit is said to be a descriptor.\n\nThe default behavior for attribute access is to get, set, or delete\nthe attribute from an object\'s dictionary. For instance, ``a.x`` has a\nlookup chain starting with ``a.__dict__[\'x\']``, then\n``type(a).__dict__[\'x\']``, and continuing through the base classes of\n``type(a)`` excluding metaclasses.\n\nHowever, if the looked-up value is an object defining one of the\ndescriptor methods, then Python may override the default behavior and\ninvoke the descriptor method instead. Where this occurs in the\nprecedence chain depends on which descriptor methods were defined and\nhow they were called.\n\nThe starting point for descriptor invocation is a binding, ``a.x``.\nHow the arguments are assembled depends on ``a``:\n\nDirect Call\n The simplest and least common call is when user code directly\n invokes a descriptor method: ``x.__get__(a)``.\n\nInstance Binding\n If binding to an object instance, ``a.x`` is transformed into the\n call: ``type(a).__dict__[\'x\'].__get__(a, type(a))``.\n\nClass Binding\n If binding to a class, ``A.x`` is transformed into the call:\n ``A.__dict__[\'x\'].__get__(None, A)``.\n\nSuper Binding\n If ``a`` is an instance of ``super``, then the binding ``super(B,\n obj).m()`` searches ``obj.__class__.__mro__`` for the base class\n ``A`` immediately preceding ``B`` and then invokes the descriptor\n with the call: ``A.__dict__[\'m\'].__get__(obj, obj.__class__)``.\n\nFor instance bindings, the precedence of descriptor invocation depends\non the which descriptor methods are defined. A descriptor can define\nany combination of ``__get__()``, ``__set__()`` and ``__delete__()``.\nIf it does not define ``__get__()``, then accessing the attribute will\nreturn the descriptor object itself unless there is a value in the\nobject\'s instance dictionary. If the descriptor defines ``__set__()``\nand/or ``__delete__()``, it is a data descriptor; if it defines\nneither, it is a non-data descriptor. Normally, data descriptors\ndefine both ``__get__()`` and ``__set__()``, while non-data\ndescriptors have just the ``__get__()`` method. Data descriptors with\n``__set__()`` and ``__get__()`` defined always override a redefinition\nin an instance dictionary. In contrast, non-data descriptors can be\noverridden by instances.\n\nPython methods (including ``staticmethod()`` and ``classmethod()``)\nare implemented as non-data descriptors. Accordingly, instances can\nredefine and override methods. This allows individual instances to\nacquire behaviors that differ from other instances of the same class.\n\nThe ``property()`` function is implemented as a data descriptor.\nAccordingly, instances cannot override the behavior of a property.\n\n\n__slots__\n---------\n\nBy default, instances of classes have a dictionary for attribute\nstorage. This wastes space for objects having very few instance\nvariables. The space consumption can become acute when creating large\nnumbers of instances.\n\nThe default can be overridden by defining *__slots__* in a class\ndefinition. The *__slots__* declaration takes a sequence of instance\nvariables and reserves just enough space in each instance to hold a\nvalue for each variable. Space is saved because *__dict__* is not\ncreated for each instance.\n\nobject.__slots__\n\n This class variable can be assigned a string, iterable, or sequence\n of strings with variable names used by instances. If defined in a\n class, *__slots__* reserves space for the declared variables and\n prevents the automatic creation of *__dict__* and *__weakref__* for\n each instance.\n\n\nNotes on using *__slots__*\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n* When inheriting from a class without *__slots__*, the *__dict__*\n attribute of that class will always be accessible, so a *__slots__*\n definition in the subclass is meaningless.\n\n* Without a *__dict__* variable, instances cannot be assigned new\n variables not listed in the *__slots__* definition. Attempts to\n assign to an unlisted variable name raises ``AttributeError``. If\n dynamic assignment of new variables is desired, then add\n ``\'__dict__\'`` to the sequence of strings in the *__slots__*\n declaration.\n\n* Without a *__weakref__* variable for each instance, classes defining\n *__slots__* do not support weak references to its instances. If weak\n reference support is needed, then add ``\'__weakref__\'`` to the\n sequence of strings in the *__slots__* declaration.\n\n* *__slots__* are implemented at the class level by creating\n descriptors (*Implementing Descriptors*) for each variable name. As\n a result, class attributes cannot be used to set default values for\n instance variables defined by *__slots__*; otherwise, the class\n attribute would overwrite the descriptor assignment.\n\n* The action of a *__slots__* declaration is limited to the class\n where it is defined. As a result, subclasses will have a *__dict__*\n unless they also define *__slots__* (which must only contain names\n of any *additional* slots).\n\n* If a class defines a slot also defined in a base class, the instance\n variable defined by the base class slot is inaccessible (except by\n retrieving its descriptor directly from the base class). This\n renders the meaning of the program undefined. In the future, a\n check may be added to prevent this.\n\n* Nonempty *__slots__* does not work for classes derived from\n "variable-length" built-in types such as ``int``, ``str`` and\n ``tuple``.\n\n* Any non-string iterable may be assigned to *__slots__*. Mappings may\n also be used; however, in the future, special meaning may be\n assigned to the values corresponding to each key.\n\n* *__class__* assignment works only if both classes have the same\n *__slots__*.\n\n\nCustomizing class creation\n==========================\n\nBy default, classes are constructed using ``type()``. A class\ndefinition is read into a separate namespace and the value of class\nname is bound to the result of ``type(name, bases, dict)``.\n\nWhen the class definition is read, if a callable ``metaclass`` keyword\nargument is passed after the bases in the class definition, the\ncallable given will be called instead of ``type()``. If other keyword\narguments are passed, they will also be passed to the metaclass. This\nallows classes or functions to be written which monitor or alter the\nclass creation process:\n\n* Modifying the class dictionary prior to the class being created.\n\n* Returning an instance of another class -- essentially performing the\n role of a factory function.\n\nThese steps will have to be performed in the metaclass\'s ``__new__()``\nmethod -- ``type.__new__()`` can then be called from this method to\ncreate a class with different properties. This example adds a new\nelement to the class dictionary before creating the class:\n\n class metacls(type):\n def __new__(mcs, name, bases, dict):\n dict[\'foo\'] = \'metacls was here\'\n return type.__new__(mcs, name, bases, dict)\n\nYou can of course also override other class methods (or add new\nmethods); for example defining a custom ``__call__()`` method in the\nmetaclass allows custom behavior when the class is called, e.g. not\nalways creating a new instance.\n\nIf the metaclass has a ``__prepare__()`` attribute (usually\nimplemented as a class or static method), it is called before the\nclass body is evaluated with the name of the class and a tuple of its\nbases for arguments. It should return an object that supports the\nmapping interface that will be used to store the namespace of the\nclass. The default is a plain dictionary. This could be used, for\nexample, to keep track of the order that class attributes are declared\nin by returning an ordered dictionary.\n\nThe appropriate metaclass is determined by the following precedence\nrules:\n\n* If the ``metaclass`` keyword argument is passed with the bases, it\n is used.\n\n* Otherwise, if there is at least one base class, its metaclass is\n used.\n\n* Otherwise, the default metaclass (``type``) is used.\n\nThe potential uses for metaclasses are boundless. Some ideas that have\nbeen explored including logging, interface checking, automatic\ndelegation, automatic property creation, proxies, frameworks, and\nautomatic resource locking/synchronization.\n\nHere is an example of a metaclass that uses an\n``collections.OrderedDict`` to remember the order that class members\nwere defined:\n\n class OrderedClass(type):\n\n @classmethod\n def __prepare__(metacls, name, bases, **kwds):\n return collections.OrderedDict()\n\n def __new__(cls, name, bases, classdict):\n result = type.__new__(cls, name, bases, dict(classdict))\n result.members = tuple(classdict)\n return result\n\n class A(metaclass=OrderedClass):\n def one(self): pass\n def two(self): pass\n def three(self): pass\n def four(self): pass\n\n >>> A.members\n (\'__module__\', \'one\', \'two\', \'three\', \'four\')\n\nWhen the class definition for *A* gets executed, the process begins\nwith calling the metaclass\'s ``__prepare__()`` method which returns an\nempty ``collections.OrderedDict``. That mapping records the methods\nand attributes of *A* as they are defined within the body of the class\nstatement. Once those definitions are executed, the ordered dictionary\nis fully populated and the metaclass\'s ``__new__()`` method gets\ninvoked. That method builds the new type and it saves the ordered\ndictionary keys in an attribute called ``members``.\n\n\nCustomizing instance and subclass checks\n========================================\n\nThe following methods are used to override the default behavior of the\n``isinstance()`` and ``issubclass()`` built-in functions.\n\nIn particular, the metaclass ``abc.ABCMeta`` implements these methods\nin order to allow the addition of Abstract Base Classes (ABCs) as\n"virtual base classes" to any class or type (including built-in\ntypes), including other ABCs.\n\nclass.__instancecheck__(self, instance)\n\n Return true if *instance* should be considered a (direct or\n indirect) instance of *class*. If defined, called to implement\n ``isinstance(instance, class)``.\n\nclass.__subclasscheck__(self, subclass)\n\n Return true if *subclass* should be considered a (direct or\n indirect) subclass of *class*. If defined, called to implement\n ``issubclass(subclass, class)``.\n\nNote that these methods are looked up on the type (metaclass) of a\nclass. They cannot be defined as class methods in the actual class.\nThis is consistent with the lookup of special methods that are called\non instances, only in this case the instance is itself a class.\n\nSee also:\n\n **PEP 3119** - Introducing Abstract Base Classes\n Includes the specification for customizing ``isinstance()`` and\n ``issubclass()`` behavior through ``__instancecheck__()`` and\n ``__subclasscheck__()``, with motivation for this functionality\n in the context of adding Abstract Base Classes (see the ``abc``\n module) to the language.\n\n\nEmulating callable objects\n==========================\n\nobject.__call__(self[, args...])\n\n Called when the instance is "called" as a function; if this method\n is defined, ``x(arg1, arg2, ...)`` is a shorthand for\n ``x.__call__(arg1, arg2, ...)``.\n\n\nEmulating container types\n=========================\n\nThe following methods can be defined to implement container objects.\nContainers usually are sequences (such as lists or tuples) or mappings\n(like dictionaries), but can represent other containers as well. The\nfirst set of methods is used either to emulate a sequence or to\nemulate a mapping; the difference is that for a sequence, the\nallowable keys should be the integers *k* for which ``0 <= k < N``\nwhere *N* is the length of the sequence, or slice objects, which\ndefine a range of items. It is also recommended that mappings provide\nthe methods ``keys()``, ``values()``, ``items()``, ``get()``,\n``clear()``, ``setdefault()``, ``pop()``, ``popitem()``, ``copy()``,\nand ``update()`` behaving similar to those for Python\'s standard\ndictionary objects. The ``collections`` module provides a\n``MutableMapping`` abstract base class to help create those methods\nfrom a base set of ``__getitem__()``, ``__setitem__()``,\n``__delitem__()``, and ``keys()``. Mutable sequences should provide\nmethods ``append()``, ``count()``, ``index()``, ``extend()``,\n``insert()``, ``pop()``, ``remove()``, ``reverse()`` and ``sort()``,\nlike Python standard list objects. Finally, sequence types should\nimplement addition (meaning concatenation) and multiplication (meaning\nrepetition) by defining the methods ``__add__()``, ``__radd__()``,\n``__iadd__()``, ``__mul__()``, ``__rmul__()`` and ``__imul__()``\ndescribed below; they should not define other numerical operators. It\nis recommended that both mappings and sequences implement the\n``__contains__()`` method to allow efficient use of the ``in``\noperator; for mappings, ``in`` should search the mapping\'s keys; for\nsequences, it should search through the values. It is further\nrecommended that both mappings and sequences implement the\n``__iter__()`` method to allow efficient iteration through the\ncontainer; for mappings, ``__iter__()`` should be the same as\n``keys()``; for sequences, it should iterate through the values.\n\nobject.__len__(self)\n\n Called to implement the built-in function ``len()``. Should return\n the length of the object, an integer ``>=`` 0. Also, an object\n that doesn\'t define a ``__bool__()`` method and whose ``__len__()``\n method returns zero is considered to be false in a Boolean context.\n\nNote: Slicing is done exclusively with the following three methods. A\n call like\n\n a[1:2] = b\n\n is translated to\n\n a[slice(1, 2, None)] = b\n\n and so forth. Missing slice items are always filled in with\n ``None``.\n\nobject.__getitem__(self, key)\n\n Called to implement evaluation of ``self[key]``. For sequence\n types, the accepted keys should be integers and slice objects.\n Note that the special interpretation of negative indexes (if the\n class wishes to emulate a sequence type) is up to the\n ``__getitem__()`` method. If *key* is of an inappropriate type,\n ``TypeError`` may be raised; if of a value outside the set of\n indexes for the sequence (after any special interpretation of\n negative values), ``IndexError`` should be raised. For mapping\n types, if *key* is missing (not in the container), ``KeyError``\n should be raised.\n\n Note: ``for`` loops expect that an ``IndexError`` will be raised for\n illegal indexes to allow proper detection of the end of the\n sequence.\n\nobject.__setitem__(self, key, value)\n\n Called to implement assignment to ``self[key]``. Same note as for\n ``__getitem__()``. This should only be implemented for mappings if\n the objects support changes to the values for keys, or if new keys\n can be added, or for sequences if elements can be replaced. The\n same exceptions should be raised for improper *key* values as for\n the ``__getitem__()`` method.\n\nobject.__delitem__(self, key)\n\n Called to implement deletion of ``self[key]``. Same note as for\n ``__getitem__()``. This should only be implemented for mappings if\n the objects support removal of keys, or for sequences if elements\n can be removed from the sequence. The same exceptions should be\n raised for improper *key* values as for the ``__getitem__()``\n method.\n\nobject.__iter__(self)\n\n This method is called when an iterator is required for a container.\n This method should return a new iterator object that can iterate\n over all the objects in the container. For mappings, it should\n iterate over the keys of the container, and should also be made\n available as the method ``keys()``.\n\n Iterator objects also need to implement this method; they are\n required to return themselves. For more information on iterator\n objects, see *Iterator Types*.\n\nobject.__reversed__(self)\n\n Called (if present) by the ``reversed()`` built-in to implement\n reverse iteration. It should return a new iterator object that\n iterates over all the objects in the container in reverse order.\n\n If the ``__reversed__()`` method is not provided, the\n ``reversed()`` built-in will fall back to using the sequence\n protocol (``__len__()`` and ``__getitem__()``). Objects that\n support the sequence protocol should only provide\n ``__reversed__()`` if they can provide an implementation that is\n more efficient than the one provided by ``reversed()``.\n\nThe membership test operators (``in`` and ``not in``) are normally\nimplemented as an iteration through a sequence. However, container\nobjects can supply the following special method with a more efficient\nimplementation, which also does not require the object be a sequence.\n\nobject.__contains__(self, item)\n\n Called to implement membership test operators. Should return true\n if *item* is in *self*, false otherwise. For mapping objects, this\n should consider the keys of the mapping rather than the values or\n the key-item pairs.\n\n For objects that don\'t define ``__contains__()``, the membership\n test first tries iteration via ``__iter__()``, then the old\n sequence iteration protocol via ``__getitem__()``, see *this\n section in the language reference*.\n\n\nEmulating numeric types\n=======================\n\nThe following methods can be defined to emulate numeric objects.\nMethods corresponding to operations that are not supported by the\nparticular kind of number implemented (e.g., bitwise operations for\nnon-integral numbers) should be left undefined.\n\nobject.__add__(self, other)\nobject.__sub__(self, other)\nobject.__mul__(self, other)\nobject.__truediv__(self, other)\nobject.__floordiv__(self, other)\nobject.__mod__(self, other)\nobject.__divmod__(self, other)\nobject.__pow__(self, other[, modulo])\nobject.__lshift__(self, other)\nobject.__rshift__(self, other)\nobject.__and__(self, other)\nobject.__xor__(self, other)\nobject.__or__(self, other)\n\n These methods are called to implement the binary arithmetic\n operations (``+``, ``-``, ``*``, ``/``, ``//``, ``%``,\n ``divmod()``, ``pow()``, ``**``, ``<<``, ``>>``, ``&``, ``^``,\n ``|``). For instance, to evaluate the expression ``x + y``, where\n *x* is an instance of a class that has an ``__add__()`` method,\n ``x.__add__(y)`` is called. The ``__divmod__()`` method should be\n the equivalent to using ``__floordiv__()`` and ``__mod__()``; it\n should not be related to ``__truediv__()``. Note that\n ``__pow__()`` should be defined to accept an optional third\n argument if the ternary version of the built-in ``pow()`` function\n is to be supported.\n\n If one of those methods does not support the operation with the\n supplied arguments, it should return ``NotImplemented``.\n\nobject.__radd__(self, other)\nobject.__rsub__(self, other)\nobject.__rmul__(self, other)\nobject.__rtruediv__(self, other)\nobject.__rfloordiv__(self, other)\nobject.__rmod__(self, other)\nobject.__rdivmod__(self, other)\nobject.__rpow__(self, other)\nobject.__rlshift__(self, other)\nobject.__rrshift__(self, other)\nobject.__rand__(self, other)\nobject.__rxor__(self, other)\nobject.__ror__(self, other)\n\n These methods are called to implement the binary arithmetic\n operations (``+``, ``-``, ``*``, ``/``, ``//``, ``%``,\n ``divmod()``, ``pow()``, ``**``, ``<<``, ``>>``, ``&``, ``^``,\n ``|``) with reflected (swapped) operands. These functions are only\n called if the left operand does not support the corresponding\n operation and the operands are of different types. [2] For\n instance, to evaluate the expression ``x - y``, where *y* is an\n instance of a class that has an ``__rsub__()`` method,\n ``y.__rsub__(x)`` is called if ``x.__sub__(y)`` returns\n *NotImplemented*.\n\n Note that ternary ``pow()`` will not try calling ``__rpow__()``\n (the coercion rules would become too complicated).\n\n Note: If the right operand\'s type is a subclass of the left operand\'s\n type and that subclass provides the reflected method for the\n operation, this method will be called before the left operand\'s\n non-reflected method. This behavior allows subclasses to\n override their ancestors\' operations.\n\nobject.__iadd__(self, other)\nobject.__isub__(self, other)\nobject.__imul__(self, other)\nobject.__itruediv__(self, other)\nobject.__ifloordiv__(self, other)\nobject.__imod__(self, other)\nobject.__ipow__(self, other[, modulo])\nobject.__ilshift__(self, other)\nobject.__irshift__(self, other)\nobject.__iand__(self, other)\nobject.__ixor__(self, other)\nobject.__ior__(self, other)\n\n These methods are called to implement the augmented arithmetic\n assignments (``+=``, ``-=``, ``*=``, ``/=``, ``//=``, ``%=``,\n ``**=``, ``<<=``, ``>>=``, ``&=``, ``^=``, ``|=``). These methods\n should attempt to do the operation in-place (modifying *self*) and\n return the result (which could be, but does not have to be,\n *self*). If a specific method is not defined, the augmented\n assignment falls back to the normal methods. For instance, to\n execute the statement ``x += y``, where *x* is an instance of a\n class that has an ``__iadd__()`` method, ``x.__iadd__(y)`` is\n called. If *x* is an instance of a class that does not define a\n ``__iadd__()`` method, ``x.__add__(y)`` and ``y.__radd__(x)`` are\n considered, as with the evaluation of ``x + y``.\n\nobject.__neg__(self)\nobject.__pos__(self)\nobject.__abs__(self)\nobject.__invert__(self)\n\n Called to implement the unary arithmetic operations (``-``, ``+``,\n ``abs()`` and ``~``).\n\nobject.__complex__(self)\nobject.__int__(self)\nobject.__float__(self)\nobject.__round__(self[, n])\n\n Called to implement the built-in functions ``complex()``,\n ``int()``, ``float()`` and ``round()``. Should return a value of\n the appropriate type.\n\nobject.__index__(self)\n\n Called to implement ``operator.index()``. Also called whenever\n Python needs an integer object (such as in slicing, or in the\n built-in ``bin()``, ``hex()`` and ``oct()`` functions). Must return\n an integer.\n\n\nWith Statement Context Managers\n===============================\n\nA *context manager* is an object that defines the runtime context to\nbe established when executing a ``with`` statement. The context\nmanager handles the entry into, and the exit from, the desired runtime\ncontext for the execution of the block of code. Context managers are\nnormally invoked using the ``with`` statement (described in section\n*The with statement*), but can also be used by directly invoking their\nmethods.\n\nTypical uses of context managers include saving and restoring various\nkinds of global state, locking and unlocking resources, closing opened\nfiles, etc.\n\nFor more information on context managers, see *Context Manager Types*.\n\nobject.__enter__(self)\n\n Enter the runtime context related to this object. The ``with``\n statement will bind this method\'s return value to the target(s)\n specified in the ``as`` clause of the statement, if any.\n\nobject.__exit__(self, exc_type, exc_value, traceback)\n\n Exit the runtime context related to this object. The parameters\n describe the exception that caused the context to be exited. If the\n context was exited without an exception, all three arguments will\n be ``None``.\n\n If an exception is supplied, and the method wishes to suppress the\n exception (i.e., prevent it from being propagated), it should\n return a true value. Otherwise, the exception will be processed\n normally upon exit from this method.\n\n Note that ``__exit__()`` methods should not reraise the passed-in\n exception; this is the caller\'s responsibility.\n\nSee also:\n\n **PEP 0343** - The "with" statement\n The specification, background, and examples for the Python\n ``with`` statement.\n\n\nSpecial method lookup\n=====================\n\nFor custom classes, implicit invocations of special methods are only\nguaranteed to work correctly if defined on an object\'s type, not in\nthe object\'s instance dictionary. That behaviour is the reason why\nthe following code raises an exception:\n\n >>> class C:\n ... pass\n ...\n >>> c = C()\n >>> c.__len__ = lambda: 5\n >>> len(c)\n Traceback (most recent call last):\n File "<stdin>", line 1, in <module>\n TypeError: object of type \'C\' has no len()\n\nThe rationale behind this behaviour lies with a number of special\nmethods such as ``__hash__()`` and ``__repr__()`` that are implemented\nby all objects, including type objects. If the implicit lookup of\nthese methods used the conventional lookup process, they would fail\nwhen invoked on the type object itself:\n\n >>> 1 .__hash__() == hash(1)\n True\n >>> int.__hash__() == hash(int)\n Traceback (most recent call last):\n File "<stdin>", line 1, in <module>\n TypeError: descriptor \'__hash__\' of \'int\' object needs an argument\n\nIncorrectly attempting to invoke an unbound method of a class in this\nway is sometimes referred to as \'metaclass confusion\', and is avoided\nby bypassing the instance when looking up special methods:\n\n >>> type(1).__hash__(1) == hash(1)\n True\n >>> type(int).__hash__(int) == hash(int)\n True\n\nIn addition to bypassing any instance attributes in the interest of\ncorrectness, implicit special method lookup generally also bypasses\nthe ``__getattribute__()`` method even of the object\'s metaclass:\n\n >>> class Meta(type):\n ... def __getattribute__(*args):\n ... print("Metaclass getattribute invoked")\n ... return type.__getattribute__(*args)\n ...\n >>> class C(object, metaclass=Meta):\n ... def __len__(self):\n ... return 10\n ... def __getattribute__(*args):\n ... print("Class getattribute invoked")\n ... return object.__getattribute__(*args)\n ...\n >>> c = C()\n >>> c.__len__() # Explicit lookup via instance\n Class getattribute invoked\n 10\n >>> type(c).__len__(c) # Explicit lookup via type\n Metaclass getattribute invoked\n 10\n >>> len(c) # Implicit lookup\n 10\n\nBypassing the ``__getattribute__()`` machinery in this fashion\nprovides significant scope for speed optimisations within the\ninterpreter, at the cost of some flexibility in the handling of\nspecial methods (the special method *must* be set on the class object\nitself in order to be consistently invoked by the interpreter).\n\n-[ Footnotes ]-\n\n[1] It *is* possible in some cases to change an object\'s type, under\n certain controlled conditions. It generally isn\'t a good idea\n though, since it can lead to some very strange behaviour if it is\n handled incorrectly.\n\n[2] For operands of the same type, it is assumed that if the non-\n reflected method (such as ``__add__()``) fails the operation is\n not supported, which is why the reflected method is not called.\n',
- 'string-methods': '\nString Methods\n**************\n\nString objects support the methods listed below.\n\nIn addition, Python\'s strings support the sequence type methods\ndescribed in the *Sequence Types --- str, bytes, bytearray, list,\ntuple, range* section. To output formatted strings, see the *String\nFormatting* section. Also, see the ``re`` module for string functions\nbased on regular expressions.\n\nstr.capitalize()\n\n Return a copy of the string with its first character capitalized\n and the rest lowercased.\n\nstr.center(width[, fillchar])\n\n Return centered in a string of length *width*. Padding is done\n using the specified *fillchar* (default is a space).\n\nstr.count(sub[, start[, end]])\n\n Return the number of non-overlapping occurrences of substring *sub*\n in the range [*start*, *end*]. Optional arguments *start* and\n *end* are interpreted as in slice notation.\n\nstr.encode(encoding="utf-8", errors="strict")\n\n Return an encoded version of the string as a bytes object. Default\n encoding is ``\'utf-8\'``. *errors* may be given to set a different\n error handling scheme. The default for *errors* is ``\'strict\'``,\n meaning that encoding errors raise a ``UnicodeError``. Other\n possible values are ``\'ignore\'``, ``\'replace\'``,\n ``\'xmlcharrefreplace\'``, ``\'backslashreplace\'`` and any other name\n registered via ``codecs.register_error()``, see section *Codec Base\n Classes*. For a list of possible encodings, see section *Standard\n Encodings*.\n\n Changed in version 3.1: Support for keyword arguments added.\n\nstr.endswith(suffix[, start[, end]])\n\n Return ``True`` if the string ends with the specified *suffix*,\n otherwise return ``False``. *suffix* can also be a tuple of\n suffixes to look for. With optional *start*, test beginning at\n that position. With optional *end*, stop comparing at that\n position.\n\nstr.expandtabs([tabsize])\n\n Return a copy of the string where all tab characters are replaced\n by zero or more spaces, depending on the current column and the\n given tab size. The column number is reset to zero after each\n newline occurring in the string. If *tabsize* is not given, a tab\n size of ``8`` characters is assumed. This doesn\'t understand other\n non-printing characters or escape sequences.\n\nstr.find(sub[, start[, end]])\n\n Return the lowest index in the string where substring *sub* is\n found, such that *sub* is contained in the slice ``s[start:end]``.\n Optional arguments *start* and *end* are interpreted as in slice\n notation. Return ``-1`` if *sub* is not found.\n\n Note: The ``find()`` method should be used only if you need to know the\n position of *sub*. To check if *sub* is a substring or not, use\n the ``in`` operator:\n\n >>> \'Py\' in \'Python\'\n True\n\nstr.format(*args, **kwargs)\n\n Perform a string formatting operation. The string on which this\n method is called can contain literal text or replacement fields\n delimited by braces ``{}``. Each replacement field contains either\n the numeric index of a positional argument, or the name of a\n keyword argument. Returns a copy of the string where each\n replacement field is replaced with the string value of the\n corresponding argument.\n\n >>> "The sum of 1 + 2 is {0}".format(1+2)\n \'The sum of 1 + 2 is 3\'\n\n See *Format String Syntax* for a description of the various\n formatting options that can be specified in format strings.\n\nstr.format_map(mapping)\n\n Similar to ``str.format(**mapping)``, except that ``mapping`` is\n used directly and not copied to a ``dict`` . This is useful if for\n example ``mapping`` is a dict subclass:\n\n >>> class Default(dict):\n ... def __missing__(self, key):\n ... return key\n ...\n >>> \'{name} was born in {country}\'.format_map(Default(name=\'Guido\'))\n \'Guido was born in country\'\n\n New in version 3.2.\n\nstr.index(sub[, start[, end]])\n\n Like ``find()``, but raise ``ValueError`` when the substring is not\n found.\n\nstr.isalnum()\n\n Return true if all characters in the string are alphanumeric and\n there is at least one character, false otherwise. A character\n ``c`` is alphanumeric if one of the following returns ``True``:\n ``c.isalpha()``, ``c.isdecimal()``, ``c.isdigit()``, or\n ``c.isnumeric()``.\n\nstr.isalpha()\n\n Return true if all characters in the string are alphabetic and\n there is at least one character, false otherwise. Alphabetic\n characters are those characters defined in the Unicode character\n database as "Letter", i.e., those with general category property\n being one of "Lm", "Lt", "Lu", "Ll", or "Lo". Note that this is\n different from the "Alphabetic" property defined in the Unicode\n Standard.\n\nstr.isdecimal()\n\n Return true if all characters in the string are decimal characters\n and there is at least one character, false otherwise. Decimal\n characters are those from general category "Nd". This category\n includes digit characters, and all characters that can be used to\n form decimal-radix numbers, e.g. U+0660, ARABIC-INDIC DIGIT ZERO.\n\nstr.isdigit()\n\n Return true if all characters in the string are digits and there is\n at least one character, false otherwise. Digits include decimal\n characters and digits that need special handling, such as the\n compatibility superscript digits. Formally, a digit is a character\n that has the property value Numeric_Type=Digit or\n Numeric_Type=Decimal.\n\nstr.isidentifier()\n\n Return true if the string is a valid identifier according to the\n language definition, section *Identifiers and keywords*.\n\nstr.islower()\n\n Return true if all cased characters [4] in the string are lowercase\n and there is at least one cased character, false otherwise.\n\nstr.isnumeric()\n\n Return true if all characters in the string are numeric characters,\n and there is at least one character, false otherwise. Numeric\n characters include digit characters, and all characters that have\n the Unicode numeric value property, e.g. U+2155, VULGAR FRACTION\n ONE FIFTH. Formally, numeric characters are those with the\n property value Numeric_Type=Digit, Numeric_Type=Decimal or\n Numeric_Type=Numeric.\n\nstr.isprintable()\n\n Return true if all characters in the string are printable or the\n string is empty, false otherwise. Nonprintable characters are\n those characters defined in the Unicode character database as\n "Other" or "Separator", excepting the ASCII space (0x20) which is\n considered printable. (Note that printable characters in this\n context are those which should not be escaped when ``repr()`` is\n invoked on a string. It has no bearing on the handling of strings\n written to ``sys.stdout`` or ``sys.stderr``.)\n\nstr.isspace()\n\n Return true if there are only whitespace characters in the string\n and there is at least one character, false otherwise. Whitespace\n characters are those characters defined in the Unicode character\n database as "Other" or "Separator" and those with bidirectional\n property being one of "WS", "B", or "S".\n\nstr.istitle()\n\n Return true if the string is a titlecased string and there is at\n least one character, for example uppercase characters may only\n follow uncased characters and lowercase characters only cased ones.\n Return false otherwise.\n\nstr.isupper()\n\n Return true if all cased characters [4] in the string are uppercase\n and there is at least one cased character, false otherwise.\n\nstr.join(iterable)\n\n Return a string which is the concatenation of the strings in the\n *iterable* *iterable*. A ``TypeError`` will be raised if there are\n any non-string values in *iterable*, including ``bytes`` objects.\n The separator between elements is the string providing this method.\n\nstr.ljust(width[, fillchar])\n\n Return the string left justified in a string of length *width*.\n Padding is done using the specified *fillchar* (default is a\n space). The original string is returned if *width* is less than or\n equal to ``len(s)``.\n\nstr.lower()\n\n Return a copy of the string with all the cased characters [4]\n converted to lowercase.\n\nstr.lstrip([chars])\n\n Return a copy of the string with leading characters removed. The\n *chars* argument is a string specifying the set of characters to be\n removed. If omitted or ``None``, the *chars* argument defaults to\n removing whitespace. The *chars* argument is not a prefix; rather,\n all combinations of its values are stripped:\n\n >>> \' spacious \'.lstrip()\n \'spacious \'\n >>> \'www.example.com\'.lstrip(\'cmowz.\')\n \'example.com\'\n\nstatic str.maketrans(x[, y[, z]])\n\n This static method returns a translation table usable for\n ``str.translate()``.\n\n If there is only one argument, it must be a dictionary mapping\n Unicode ordinals (integers) or characters (strings of length 1) to\n Unicode ordinals, strings (of arbitrary lengths) or None.\n Character keys will then be converted to ordinals.\n\n If there are two arguments, they must be strings of equal length,\n and in the resulting dictionary, each character in x will be mapped\n to the character at the same position in y. If there is a third\n argument, it must be a string, whose characters will be mapped to\n None in the result.\n\nstr.partition(sep)\n\n Split the string at the first occurrence of *sep*, and return a\n 3-tuple containing the part before the separator, the separator\n itself, and the part after the separator. If the separator is not\n found, return a 3-tuple containing the string itself, followed by\n two empty strings.\n\nstr.replace(old, new[, count])\n\n Return a copy of the string with all occurrences of substring *old*\n replaced by *new*. If the optional argument *count* is given, only\n the first *count* occurrences are replaced.\n\nstr.rfind(sub[, start[, end]])\n\n Return the highest index in the string where substring *sub* is\n found, such that *sub* is contained within ``s[start:end]``.\n Optional arguments *start* and *end* are interpreted as in slice\n notation. Return ``-1`` on failure.\n\nstr.rindex(sub[, start[, end]])\n\n Like ``rfind()`` but raises ``ValueError`` when the substring *sub*\n is not found.\n\nstr.rjust(width[, fillchar])\n\n Return the string right justified in a string of length *width*.\n Padding is done using the specified *fillchar* (default is a\n space). The original string is returned if *width* is less than or\n equal to ``len(s)``.\n\nstr.rpartition(sep)\n\n Split the string at the last occurrence of *sep*, and return a\n 3-tuple containing the part before the separator, the separator\n itself, and the part after the separator. If the separator is not\n found, return a 3-tuple containing two empty strings, followed by\n the string itself.\n\nstr.rsplit([sep[, maxsplit]])\n\n Return a list of the words in the string, using *sep* as the\n delimiter string. If *maxsplit* is given, at most *maxsplit* splits\n are done, the *rightmost* ones. If *sep* is not specified or\n ``None``, any whitespace string is a separator. Except for\n splitting from the right, ``rsplit()`` behaves like ``split()``\n which is described in detail below.\n\nstr.rstrip([chars])\n\n Return a copy of the string with trailing characters removed. The\n *chars* argument is a string specifying the set of characters to be\n removed. If omitted or ``None``, the *chars* argument defaults to\n removing whitespace. The *chars* argument is not a suffix; rather,\n all combinations of its values are stripped:\n\n >>> \' spacious \'.rstrip()\n \' spacious\'\n >>> \'mississippi\'.rstrip(\'ipz\')\n \'mississ\'\n\nstr.split([sep[, maxsplit]])\n\n Return a list of the words in the string, using *sep* as the\n delimiter string. If *maxsplit* is given, at most *maxsplit*\n splits are done (thus, the list will have at most ``maxsplit+1``\n elements). If *maxsplit* is not specified, then there is no limit\n on the number of splits (all possible splits are made).\n\n If *sep* is given, consecutive delimiters are not grouped together\n and are deemed to delimit empty strings (for example,\n ``\'1,,2\'.split(\',\')`` returns ``[\'1\', \'\', \'2\']``). The *sep*\n argument may consist of multiple characters (for example,\n ``\'1<>2<>3\'.split(\'<>\')`` returns ``[\'1\', \'2\', \'3\']``). Splitting\n an empty string with a specified separator returns ``[\'\']``.\n\n If *sep* is not specified or is ``None``, a different splitting\n algorithm is applied: runs of consecutive whitespace are regarded\n as a single separator, and the result will contain no empty strings\n at the start or end if the string has leading or trailing\n whitespace. Consequently, splitting an empty string or a string\n consisting of just whitespace with a ``None`` separator returns\n ``[]``.\n\n For example, ``\' 1 2 3 \'.split()`` returns ``[\'1\', \'2\', \'3\']``,\n and ``\' 1 2 3 \'.split(None, 1)`` returns ``[\'1\', \'2 3 \']``.\n\nstr.splitlines([keepends])\n\n Return a list of the lines in the string, breaking at line\n boundaries. Line breaks are not included in the resulting list\n unless *keepends* is given and true.\n\nstr.startswith(prefix[, start[, end]])\n\n Return ``True`` if string starts with the *prefix*, otherwise\n return ``False``. *prefix* can also be a tuple of prefixes to look\n for. With optional *start*, test string beginning at that\n position. With optional *end*, stop comparing string at that\n position.\n\nstr.strip([chars])\n\n Return a copy of the string with the leading and trailing\n characters removed. The *chars* argument is a string specifying the\n set of characters to be removed. If omitted or ``None``, the\n *chars* argument defaults to removing whitespace. The *chars*\n argument is not a prefix or suffix; rather, all combinations of its\n values are stripped:\n\n >>> \' spacious \'.strip()\n \'spacious\'\n >>> \'www.example.com\'.strip(\'cmowz.\')\n \'example\'\n\nstr.swapcase()\n\n Return a copy of the string with uppercase characters converted to\n lowercase and vice versa.\n\nstr.title()\n\n Return a titlecased version of the string where words start with an\n uppercase character and the remaining characters are lowercase.\n\n The algorithm uses a simple language-independent definition of a\n word as groups of consecutive letters. The definition works in\n many contexts but it means that apostrophes in contractions and\n possessives form word boundaries, which may not be the desired\n result:\n\n >>> "they\'re bill\'s friends from the UK".title()\n "They\'Re Bill\'S Friends From The Uk"\n\n A workaround for apostrophes can be constructed using regular\n expressions:\n\n >>> import re\n >>> def titlecase(s):\n return re.sub(r"[A-Za-z]+(\'[A-Za-z]+)?",\n lambda mo: mo.group(0)[0].upper() +\n mo.group(0)[1:].lower(),\n s)\n\n >>> titlecase("they\'re bill\'s friends.")\n "They\'re Bill\'s Friends."\n\nstr.translate(map)\n\n Return a copy of the *s* where all characters have been mapped\n through the *map* which must be a dictionary of Unicode ordinals\n (integers) to Unicode ordinals, strings or ``None``. Unmapped\n characters are left untouched. Characters mapped to ``None`` are\n deleted.\n\n You can use ``str.maketrans()`` to create a translation map from\n character-to-character mappings in different formats.\n\n Note: An even more flexible approach is to create a custom character\n mapping codec using the ``codecs`` module (see\n ``encodings.cp1251`` for an example).\n\nstr.upper()\n\n Return a copy of the string with all the cased characters [4]\n converted to uppercase. Note that ``str.upper().isupper()`` might\n be ``False`` if ``s`` contains uncased characters or if the Unicode\n category of the resulting character(s) is not "Lu" (Letter,\n uppercase), but e.g. "Lt" (Letter, titlecase).\n\nstr.zfill(width)\n\n Return the numeric string left filled with zeros in a string of\n length *width*. A sign prefix is handled correctly. The original\n string is returned if *width* is less than or equal to ``len(s)``.\n',
- 'strings': '\nString and Bytes literals\n*************************\n\nString literals are described by the following lexical definitions:\n\n stringliteral ::= [stringprefix](shortstring | longstring)\n stringprefix ::= "r" | "R"\n shortstring ::= "\'" shortstringitem* "\'" | \'"\' shortstringitem* \'"\'\n longstring ::= "\'\'\'" longstringitem* "\'\'\'" | \'"""\' longstringitem* \'"""\'\n shortstringitem ::= shortstringchar | stringescapeseq\n longstringitem ::= longstringchar | stringescapeseq\n shortstringchar ::= <any source character except "\\" or newline or the quote>\n longstringchar ::= <any source character except "\\">\n stringescapeseq ::= "\\" <any source character>\n\n bytesliteral ::= bytesprefix(shortbytes | longbytes)\n bytesprefix ::= "b" | "B" | "br" | "Br" | "bR" | "BR"\n shortbytes ::= "\'" shortbytesitem* "\'" | \'"\' shortbytesitem* \'"\'\n longbytes ::= "\'\'\'" longbytesitem* "\'\'\'" | \'"""\' longbytesitem* \'"""\'\n shortbytesitem ::= shortbyteschar | bytesescapeseq\n longbytesitem ::= longbyteschar | bytesescapeseq\n shortbyteschar ::= <any ASCII character except "\\" or newline or the quote>\n longbyteschar ::= <any ASCII character except "\\">\n bytesescapeseq ::= "\\" <any ASCII character>\n\nOne syntactic restriction not indicated by these productions is that\nwhitespace is not allowed between the ``stringprefix`` or\n``bytesprefix`` and the rest of the literal. The source character set\nis defined by the encoding declaration; it is UTF-8 if no encoding\ndeclaration is given in the source file; see section *Encoding\ndeclarations*.\n\nIn plain English: Both types of literals can be enclosed in matching\nsingle quotes (``\'``) or double quotes (``"``). They can also be\nenclosed in matching groups of three single or double quotes (these\nare generally referred to as *triple-quoted strings*). The backslash\n(``\\``) character is used to escape characters that otherwise have a\nspecial meaning, such as newline, backslash itself, or the quote\ncharacter.\n\nBytes literals are always prefixed with ``\'b\'`` or ``\'B\'``; they\nproduce an instance of the ``bytes`` type instead of the ``str`` type.\nThey may only contain ASCII characters; bytes with a numeric value of\n128 or greater must be expressed with escapes.\n\nBoth string and bytes literals may optionally be prefixed with a\nletter ``\'r\'`` or ``\'R\'``; such strings are called *raw strings* and\ntreat backslashes as literal characters. As a result, in string\nliterals, ``\'\\U\'`` and ``\'\\u\'`` escapes in raw strings are not treated\nspecially.\n\nIn triple-quoted strings, unescaped newlines and quotes are allowed\n(and are retained), except that three unescaped quotes in a row\nterminate the string. (A "quote" is the character used to open the\nstring, i.e. either ``\'`` or ``"``.)\n\nUnless an ``\'r\'`` or ``\'R\'`` prefix is present, escape sequences in\nstrings are interpreted according to rules similar to those used by\nStandard C. The recognized escape sequences are:\n\n+-------------------+-----------------------------------+---------+\n| Escape Sequence | Meaning | Notes |\n+===================+===================================+=========+\n| ``\\newline`` | Backslash and newline ignored | |\n+-------------------+-----------------------------------+---------+\n| ``\\\\`` | Backslash (``\\``) | |\n+-------------------+-----------------------------------+---------+\n| ``\\\'`` | Single quote (``\'``) | |\n+-------------------+-----------------------------------+---------+\n| ``\\"`` | Double quote (``"``) | |\n+-------------------+-----------------------------------+---------+\n| ``\\a`` | ASCII Bell (BEL) | |\n+-------------------+-----------------------------------+---------+\n| ``\\b`` | ASCII Backspace (BS) | |\n+-------------------+-----------------------------------+---------+\n| ``\\f`` | ASCII Formfeed (FF) | |\n+-------------------+-----------------------------------+---------+\n| ``\\n`` | ASCII Linefeed (LF) | |\n+-------------------+-----------------------------------+---------+\n| ``\\r`` | ASCII Carriage Return (CR) | |\n+-------------------+-----------------------------------+---------+\n| ``\\t`` | ASCII Horizontal Tab (TAB) | |\n+-------------------+-----------------------------------+---------+\n| ``\\v`` | ASCII Vertical Tab (VT) | |\n+-------------------+-----------------------------------+---------+\n| ``\\ooo`` | Character with octal value *ooo* | (1,3) |\n+-------------------+-----------------------------------+---------+\n| ``\\xhh`` | Character with hex value *hh* | (2,3) |\n+-------------------+-----------------------------------+---------+\n\nEscape sequences only recognized in string literals are:\n\n+-------------------+-----------------------------------+---------+\n| Escape Sequence | Meaning | Notes |\n+===================+===================================+=========+\n| ``\\N{name}`` | Character named *name* in the | |\n| | Unicode database | |\n+-------------------+-----------------------------------+---------+\n| ``\\uxxxx`` | Character with 16-bit hex value | (4) |\n| | *xxxx* | |\n+-------------------+-----------------------------------+---------+\n| ``\\Uxxxxxxxx`` | Character with 32-bit hex value | (5) |\n| | *xxxxxxxx* | |\n+-------------------+-----------------------------------+---------+\n\nNotes:\n\n1. As in Standard C, up to three octal digits are accepted.\n\n2. Unlike in Standard C, exactly two hex digits are required.\n\n3. In a bytes literal, hexadecimal and octal escapes denote the byte\n with the given value. In a string literal, these escapes denote a\n Unicode character with the given value.\n\n4. Individual code units which form parts of a surrogate pair can be\n encoded using this escape sequence. Exactly four hex digits are\n required.\n\n5. Any Unicode character can be encoded this way, but characters\n outside the Basic Multilingual Plane (BMP) will be encoded using a\n surrogate pair if Python is compiled to use 16-bit code units (the\n default). Exactly eight hex digits are required.\n\nUnlike Standard C, all unrecognized escape sequences are left in the\nstring unchanged, i.e., *the backslash is left in the string*. (This\nbehavior is useful when debugging: if an escape sequence is mistyped,\nthe resulting output is more easily recognized as broken.) It is also\nimportant to note that the escape sequences only recognized in string\nliterals fall into the category of unrecognized escapes for bytes\nliterals.\n\nEven in a raw string, string quotes can be escaped with a backslash,\nbut the backslash remains in the string; for example, ``r"\\""`` is a\nvalid string literal consisting of two characters: a backslash and a\ndouble quote; ``r"\\"`` is not a valid string literal (even a raw\nstring cannot end in an odd number of backslashes). Specifically, *a\nraw string cannot end in a single backslash* (since the backslash\nwould escape the following quote character). Note also that a single\nbackslash followed by a newline is interpreted as those two characters\nas part of the string, *not* as a line continuation.\n',
+ 'specialattrs': '\nSpecial Attributes\n******************\n\nThe implementation adds a few special read-only attributes to several\nobject types, where they are relevant. Some of these are not reported\nby the ``dir()`` built-in function.\n\nobject.__dict__\n\n A dictionary or other mapping object used to store an object\'s\n (writable) attributes.\n\ninstance.__class__\n\n The class to which a class instance belongs.\n\nclass.__bases__\n\n The tuple of base classes of a class object.\n\nclass.__name__\n\n The name of the class or type.\n\nclass.__qualname__\n\n The *qualified name* of the class or type.\n\n New in version 3.3.\n\nclass.__mro__\n\n This attribute is a tuple of classes that are considered when\n looking for base classes during method resolution.\n\nclass.mro()\n\n This method can be overridden by a metaclass to customize the\n method resolution order for its instances. It is called at class\n instantiation, and its result is stored in ``__mro__``.\n\nclass.__subclasses__()\n\n Each class keeps a list of weak references to its immediate\n subclasses. This method returns a list of all those references\n still alive. Example:\n\n >>> int.__subclasses__()\n [<class \'bool\'>]\n\n-[ Footnotes ]-\n\n[1] Additional information on these special methods may be found in\n the Python Reference Manual (*Basic customization*).\n\n[2] As a consequence, the list ``[1, 2]`` is considered equal to\n ``[1.0, 2.0]``, and similarly for tuples.\n\n[3] They must have since the parser can\'t tell the type of the\n operands.\n\n[4] Cased characters are those with general category property being\n one of "Lu" (Letter, uppercase), "Ll" (Letter, lowercase), or "Lt"\n (Letter, titlecase).\n\n[5] To format only a tuple you should therefore provide a singleton\n tuple whose only element is the tuple to be formatted.\n',
+ 'specialnames': '\nSpecial method names\n********************\n\nA class can implement certain operations that are invoked by special\nsyntax (such as arithmetic operations or subscripting and slicing) by\ndefining methods with special names. This is Python\'s approach to\n*operator overloading*, allowing classes to define their own behavior\nwith respect to language operators. For instance, if a class defines\na method named ``__getitem__()``, and ``x`` is an instance of this\nclass, then ``x[i]`` is roughly equivalent to ``type(x).__getitem__(x,\ni)``. Except where mentioned, attempts to execute an operation raise\nan exception when no appropriate method is defined (typically\n``AttributeError`` or ``TypeError``).\n\nWhen implementing a class that emulates any built-in type, it is\nimportant that the emulation only be implemented to the degree that it\nmakes sense for the object being modelled. For example, some\nsequences may work well with retrieval of individual elements, but\nextracting a slice may not make sense. (One example of this is the\n``NodeList`` interface in the W3C\'s Document Object Model.)\n\n\nBasic customization\n===================\n\nobject.__new__(cls[, ...])\n\n Called to create a new instance of class *cls*. ``__new__()`` is a\n static method (special-cased so you need not declare it as such)\n that takes the class of which an instance was requested as its\n first argument. The remaining arguments are those passed to the\n object constructor expression (the call to the class). The return\n value of ``__new__()`` should be the new object instance (usually\n an instance of *cls*).\n\n Typical implementations create a new instance of the class by\n invoking the superclass\'s ``__new__()`` method using\n ``super(currentclass, cls).__new__(cls[, ...])`` with appropriate\n arguments and then modifying the newly-created instance as\n necessary before returning it.\n\n If ``__new__()`` returns an instance of *cls*, then the new\n instance\'s ``__init__()`` method will be invoked like\n ``__init__(self[, ...])``, where *self* is the new instance and the\n remaining arguments are the same as were passed to ``__new__()``.\n\n If ``__new__()`` does not return an instance of *cls*, then the new\n instance\'s ``__init__()`` method will not be invoked.\n\n ``__new__()`` is intended mainly to allow subclasses of immutable\n types (like int, str, or tuple) to customize instance creation. It\n is also commonly overridden in custom metaclasses in order to\n customize class creation.\n\nobject.__init__(self[, ...])\n\n Called when the instance is created. The arguments are those\n passed to the class constructor expression. If a base class has an\n ``__init__()`` method, the derived class\'s ``__init__()`` method,\n if any, must explicitly call it to ensure proper initialization of\n the base class part of the instance; for example:\n ``BaseClass.__init__(self, [args...])``. As a special constraint\n on constructors, no value may be returned; doing so will cause a\n ``TypeError`` to be raised at runtime.\n\nobject.__del__(self)\n\n Called when the instance is about to be destroyed. This is also\n called a destructor. If a base class has a ``__del__()`` method,\n the derived class\'s ``__del__()`` method, if any, must explicitly\n call it to ensure proper deletion of the base class part of the\n instance. Note that it is possible (though not recommended!) for\n the ``__del__()`` method to postpone destruction of the instance by\n creating a new reference to it. It may then be called at a later\n time when this new reference is deleted. It is not guaranteed that\n ``__del__()`` methods are called for objects that still exist when\n the interpreter exits.\n\n Note: ``del x`` doesn\'t directly call ``x.__del__()`` --- the former\n decrements the reference count for ``x`` by one, and the latter\n is only called when ``x``\'s reference count reaches zero. Some\n common situations that may prevent the reference count of an\n object from going to zero include: circular references between\n objects (e.g., a doubly-linked list or a tree data structure with\n parent and child pointers); a reference to the object on the\n stack frame of a function that caught an exception (the traceback\n stored in ``sys.exc_info()[2]`` keeps the stack frame alive); or\n a reference to the object on the stack frame that raised an\n unhandled exception in interactive mode (the traceback stored in\n ``sys.last_traceback`` keeps the stack frame alive). The first\n situation can only be remedied by explicitly breaking the cycles;\n the latter two situations can be resolved by storing ``None`` in\n ``sys.last_traceback``. Circular references which are garbage are\n detected when the option cycle detector is enabled (it\'s on by\n default), but can only be cleaned up if there are no Python-\n level ``__del__()`` methods involved. Refer to the documentation\n for the ``gc`` module for more information about how\n ``__del__()`` methods are handled by the cycle detector,\n particularly the description of the ``garbage`` value.\n\n Warning: Due to the precarious circumstances under which ``__del__()``\n methods are invoked, exceptions that occur during their execution\n are ignored, and a warning is printed to ``sys.stderr`` instead.\n Also, when ``__del__()`` is invoked in response to a module being\n deleted (e.g., when execution of the program is done), other\n globals referenced by the ``__del__()`` method may already have\n been deleted or in the process of being torn down (e.g. the\n import machinery shutting down). For this reason, ``__del__()``\n methods should do the absolute minimum needed to maintain\n external invariants. Starting with version 1.5, Python\n guarantees that globals whose name begins with a single\n underscore are deleted from their module before other globals are\n deleted; if no other references to such globals exist, this may\n help in assuring that imported modules are still available at the\n time when the ``__del__()`` method is called.\n\nobject.__repr__(self)\n\n Called by the ``repr()`` built-in function to compute the\n "official" string representation of an object. If at all possible,\n this should look like a valid Python expression that could be used\n to recreate an object with the same value (given an appropriate\n environment). If this is not possible, a string of the form\n ``<...some useful description...>`` should be returned. The return\n value must be a string object. If a class defines ``__repr__()``\n but not ``__str__()``, then ``__repr__()`` is also used when an\n "informal" string representation of instances of that class is\n required.\n\n This is typically used for debugging, so it is important that the\n representation is information-rich and unambiguous.\n\nobject.__str__(self)\n\n Called by the ``str()`` built-in function and by the ``print()``\n function to compute the "informal" string representation of an\n object. This differs from ``__repr__()`` in that it does not have\n to be a valid Python expression: a more convenient or concise\n representation may be used instead. The return value must be a\n string object.\n\nobject.__bytes__(self)\n\n Called by ``bytes()`` to compute a byte-string representation of an\n object. This should return a ``bytes`` object.\n\nobject.__format__(self, format_spec)\n\n Called by the ``format()`` built-in function (and by extension, the\n ``format()`` method of class ``str``) to produce a "formatted"\n string representation of an object. The ``format_spec`` argument is\n a string that contains a description of the formatting options\n desired. The interpretation of the ``format_spec`` argument is up\n to the type implementing ``__format__()``, however most classes\n will either delegate formatting to one of the built-in types, or\n use a similar formatting option syntax.\n\n See *Format Specification Mini-Language* for a description of the\n standard formatting syntax.\n\n The return value must be a string object.\n\nobject.__lt__(self, other)\nobject.__le__(self, other)\nobject.__eq__(self, other)\nobject.__ne__(self, other)\nobject.__gt__(self, other)\nobject.__ge__(self, other)\n\n These are the so-called "rich comparison" methods. The\n correspondence between operator symbols and method names is as\n follows: ``x<y`` calls ``x.__lt__(y)``, ``x<=y`` calls\n ``x.__le__(y)``, ``x==y`` calls ``x.__eq__(y)``, ``x!=y`` calls\n ``x.__ne__(y)``, ``x>y`` calls ``x.__gt__(y)``, and ``x>=y`` calls\n ``x.__ge__(y)``.\n\n A rich comparison method may return the singleton\n ``NotImplemented`` if it does not implement the operation for a\n given pair of arguments. By convention, ``False`` and ``True`` are\n returned for a successful comparison. However, these methods can\n return any value, so if the comparison operator is used in a\n Boolean context (e.g., in the condition of an ``if`` statement),\n Python will call ``bool()`` on the value to determine if the result\n is true or false.\n\n There are no implied relationships among the comparison operators.\n The truth of ``x==y`` does not imply that ``x!=y`` is false.\n Accordingly, when defining ``__eq__()``, one should also define\n ``__ne__()`` so that the operators will behave as expected. See\n the paragraph on ``__hash__()`` for some important notes on\n creating *hashable* objects which support custom comparison\n operations and are usable as dictionary keys.\n\n There are no swapped-argument versions of these methods (to be used\n when the left argument does not support the operation but the right\n argument does); rather, ``__lt__()`` and ``__gt__()`` are each\n other\'s reflection, ``__le__()`` and ``__ge__()`` are each other\'s\n reflection, and ``__eq__()`` and ``__ne__()`` are their own\n reflection.\n\n Arguments to rich comparison methods are never coerced.\n\n To automatically generate ordering operations from a single root\n operation, see ``functools.total_ordering()``.\n\nobject.__hash__(self)\n\n Called by built-in function ``hash()`` and for operations on\n members of hashed collections including ``set``, ``frozenset``, and\n ``dict``. ``__hash__()`` should return an integer. The only\n required property is that objects which compare equal have the same\n hash value; it is advised to somehow mix together (e.g. using\n exclusive or) the hash values for the components of the object that\n also play a part in comparison of objects.\n\n If a class does not define an ``__eq__()`` method it should not\n define a ``__hash__()`` operation either; if it defines\n ``__eq__()`` but not ``__hash__()``, its instances will not be\n usable as items in hashable collections. If a class defines\n mutable objects and implements an ``__eq__()`` method, it should\n not implement ``__hash__()``, since the implementation of hashable\n collections requires that a key\'s hash value is immutable (if the\n object\'s hash value changes, it will be in the wrong hash bucket).\n\n User-defined classes have ``__eq__()`` and ``__hash__()`` methods\n by default; with them, all objects compare unequal (except with\n themselves) and ``x.__hash__()`` returns an appropriate value such\n that ``x == y`` implies both that ``x is y`` and ``hash(x) ==\n hash(y)``.\n\n Classes which inherit a ``__hash__()`` method from a parent class\n but change the meaning of ``__eq__()`` such that the hash value\n returned is no longer appropriate (e.g. by switching to a value-\n based concept of equality instead of the default identity based\n equality) can explicitly flag themselves as being unhashable by\n setting ``__hash__ = None`` in the class definition. Doing so means\n that not only will instances of the class raise an appropriate\n ``TypeError`` when a program attempts to retrieve their hash value,\n but they will also be correctly identified as unhashable when\n checking ``isinstance(obj, collections.Hashable)`` (unlike classes\n which define their own ``__hash__()`` to explicitly raise\n ``TypeError``).\n\n If a class that overrides ``__eq__()`` needs to retain the\n implementation of ``__hash__()`` from a parent class, the\n interpreter must be told this explicitly by setting ``__hash__ =\n <ParentClass>.__hash__``. Otherwise the inheritance of\n ``__hash__()`` will be blocked, just as if ``__hash__`` had been\n explicitly set to ``None``.\n\n Note: By default, the ``__hash__()`` values of str, bytes and datetime\n objects are "salted" with an unpredictable random value.\n Although they remain constant within an individual Python\n process, they are not predictable between repeated invocations of\n Python.This is intended to provide protection against a denial-\n of-service caused by carefully-chosen inputs that exploit the\n worst case performance of a dict insertion, O(n^2) complexity.\n See http://www.ocert.org/advisories/ocert-2011-003.html for\n details.Changing hash values affects the iteration order of\n dicts, sets and other mappings. Python has never made guarantees\n about this ordering (and it typically varies between 32-bit and\n 64-bit builds).See also ``PYTHONHASHSEED``.\n\n Changed in version 3.3: Hash randomization is enabled by default.\n\nobject.__bool__(self)\n\n Called to implement truth value testing and the built-in operation\n ``bool()``; should return ``False`` or ``True``. When this method\n is not defined, ``__len__()`` is called, if it is defined, and the\n object is considered true if its result is nonzero. If a class\n defines neither ``__len__()`` nor ``__bool__()``, all its instances\n are considered true.\n\n\nCustomizing attribute access\n============================\n\nThe following methods can be defined to customize the meaning of\nattribute access (use of, assignment to, or deletion of ``x.name``)\nfor class instances.\n\nobject.__getattr__(self, name)\n\n Called when an attribute lookup has not found the attribute in the\n usual places (i.e. it is not an instance attribute nor is it found\n in the class tree for ``self``). ``name`` is the attribute name.\n This method should return the (computed) attribute value or raise\n an ``AttributeError`` exception.\n\n Note that if the attribute is found through the normal mechanism,\n ``__getattr__()`` is not called. (This is an intentional asymmetry\n between ``__getattr__()`` and ``__setattr__()``.) This is done both\n for efficiency reasons and because otherwise ``__getattr__()``\n would have no way to access other attributes of the instance. Note\n that at least for instance variables, you can fake total control by\n not inserting any values in the instance attribute dictionary (but\n instead inserting them in another object). See the\n ``__getattribute__()`` method below for a way to actually get total\n control over attribute access.\n\nobject.__getattribute__(self, name)\n\n Called unconditionally to implement attribute accesses for\n instances of the class. If the class also defines\n ``__getattr__()``, the latter will not be called unless\n ``__getattribute__()`` either calls it explicitly or raises an\n ``AttributeError``. This method should return the (computed)\n attribute value or raise an ``AttributeError`` exception. In order\n to avoid infinite recursion in this method, its implementation\n should always call the base class method with the same name to\n access any attributes it needs, for example,\n ``object.__getattribute__(self, name)``.\n\n Note: This method may still be bypassed when looking up special methods\n as the result of implicit invocation via language syntax or\n built-in functions. See *Special method lookup*.\n\nobject.__setattr__(self, name, value)\n\n Called when an attribute assignment is attempted. This is called\n instead of the normal mechanism (i.e. store the value in the\n instance dictionary). *name* is the attribute name, *value* is the\n value to be assigned to it.\n\n If ``__setattr__()`` wants to assign to an instance attribute, it\n should call the base class method with the same name, for example,\n ``object.__setattr__(self, name, value)``.\n\nobject.__delattr__(self, name)\n\n Like ``__setattr__()`` but for attribute deletion instead of\n assignment. This should only be implemented if ``del obj.name`` is\n meaningful for the object.\n\nobject.__dir__(self)\n\n Called when ``dir()`` is called on the object. A sequence must be\n returned. ``dir()`` converts the returned sequence to a list and\n sorts it.\n\n\nImplementing Descriptors\n------------------------\n\nThe following methods only apply when an instance of the class\ncontaining the method (a so-called *descriptor* class) appears in an\n*owner* class (the descriptor must be in either the owner\'s class\ndictionary or in the class dictionary for one of its parents). In the\nexamples below, "the attribute" refers to the attribute whose name is\nthe key of the property in the owner class\' ``__dict__``.\n\nobject.__get__(self, instance, owner)\n\n Called to get the attribute of the owner class (class attribute\n access) or of an instance of that class (instance attribute\n access). *owner* is always the owner class, while *instance* is the\n instance that the attribute was accessed through, or ``None`` when\n the attribute is accessed through the *owner*. This method should\n return the (computed) attribute value or raise an\n ``AttributeError`` exception.\n\nobject.__set__(self, instance, value)\n\n Called to set the attribute on an instance *instance* of the owner\n class to a new value, *value*.\n\nobject.__delete__(self, instance)\n\n Called to delete the attribute on an instance *instance* of the\n owner class.\n\n\nInvoking Descriptors\n--------------------\n\nIn general, a descriptor is an object attribute with "binding\nbehavior", one whose attribute access has been overridden by methods\nin the descriptor protocol: ``__get__()``, ``__set__()``, and\n``__delete__()``. If any of those methods are defined for an object,\nit is said to be a descriptor.\n\nThe default behavior for attribute access is to get, set, or delete\nthe attribute from an object\'s dictionary. For instance, ``a.x`` has a\nlookup chain starting with ``a.__dict__[\'x\']``, then\n``type(a).__dict__[\'x\']``, and continuing through the base classes of\n``type(a)`` excluding metaclasses.\n\nHowever, if the looked-up value is an object defining one of the\ndescriptor methods, then Python may override the default behavior and\ninvoke the descriptor method instead. Where this occurs in the\nprecedence chain depends on which descriptor methods were defined and\nhow they were called.\n\nThe starting point for descriptor invocation is a binding, ``a.x``.\nHow the arguments are assembled depends on ``a``:\n\nDirect Call\n The simplest and least common call is when user code directly\n invokes a descriptor method: ``x.__get__(a)``.\n\nInstance Binding\n If binding to an object instance, ``a.x`` is transformed into the\n call: ``type(a).__dict__[\'x\'].__get__(a, type(a))``.\n\nClass Binding\n If binding to a class, ``A.x`` is transformed into the call:\n ``A.__dict__[\'x\'].__get__(None, A)``.\n\nSuper Binding\n If ``a`` is an instance of ``super``, then the binding ``super(B,\n obj).m()`` searches ``obj.__class__.__mro__`` for the base class\n ``A`` immediately preceding ``B`` and then invokes the descriptor\n with the call: ``A.__dict__[\'m\'].__get__(obj, obj.__class__)``.\n\nFor instance bindings, the precedence of descriptor invocation depends\non the which descriptor methods are defined. A descriptor can define\nany combination of ``__get__()``, ``__set__()`` and ``__delete__()``.\nIf it does not define ``__get__()``, then accessing the attribute will\nreturn the descriptor object itself unless there is a value in the\nobject\'s instance dictionary. If the descriptor defines ``__set__()``\nand/or ``__delete__()``, it is a data descriptor; if it defines\nneither, it is a non-data descriptor. Normally, data descriptors\ndefine both ``__get__()`` and ``__set__()``, while non-data\ndescriptors have just the ``__get__()`` method. Data descriptors with\n``__set__()`` and ``__get__()`` defined always override a redefinition\nin an instance dictionary. In contrast, non-data descriptors can be\noverridden by instances.\n\nPython methods (including ``staticmethod()`` and ``classmethod()``)\nare implemented as non-data descriptors. Accordingly, instances can\nredefine and override methods. This allows individual instances to\nacquire behaviors that differ from other instances of the same class.\n\nThe ``property()`` function is implemented as a data descriptor.\nAccordingly, instances cannot override the behavior of a property.\n\n\n__slots__\n---------\n\nBy default, instances of classes have a dictionary for attribute\nstorage. This wastes space for objects having very few instance\nvariables. The space consumption can become acute when creating large\nnumbers of instances.\n\nThe default can be overridden by defining *__slots__* in a class\ndefinition. The *__slots__* declaration takes a sequence of instance\nvariables and reserves just enough space in each instance to hold a\nvalue for each variable. Space is saved because *__dict__* is not\ncreated for each instance.\n\nobject.__slots__\n\n This class variable can be assigned a string, iterable, or sequence\n of strings with variable names used by instances. If defined in a\n class, *__slots__* reserves space for the declared variables and\n prevents the automatic creation of *__dict__* and *__weakref__* for\n each instance.\n\n\nNotes on using *__slots__*\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n* When inheriting from a class without *__slots__*, the *__dict__*\n attribute of that class will always be accessible, so a *__slots__*\n definition in the subclass is meaningless.\n\n* Without a *__dict__* variable, instances cannot be assigned new\n variables not listed in the *__slots__* definition. Attempts to\n assign to an unlisted variable name raises ``AttributeError``. If\n dynamic assignment of new variables is desired, then add\n ``\'__dict__\'`` to the sequence of strings in the *__slots__*\n declaration.\n\n* Without a *__weakref__* variable for each instance, classes defining\n *__slots__* do not support weak references to its instances. If weak\n reference support is needed, then add ``\'__weakref__\'`` to the\n sequence of strings in the *__slots__* declaration.\n\n* *__slots__* are implemented at the class level by creating\n descriptors (*Implementing Descriptors*) for each variable name. As\n a result, class attributes cannot be used to set default values for\n instance variables defined by *__slots__*; otherwise, the class\n attribute would overwrite the descriptor assignment.\n\n* The action of a *__slots__* declaration is limited to the class\n where it is defined. As a result, subclasses will have a *__dict__*\n unless they also define *__slots__* (which must only contain names\n of any *additional* slots).\n\n* If a class defines a slot also defined in a base class, the instance\n variable defined by the base class slot is inaccessible (except by\n retrieving its descriptor directly from the base class). This\n renders the meaning of the program undefined. In the future, a\n check may be added to prevent this.\n\n* Nonempty *__slots__* does not work for classes derived from\n "variable-length" built-in types such as ``int``, ``str`` and\n ``tuple``.\n\n* Any non-string iterable may be assigned to *__slots__*. Mappings may\n also be used; however, in the future, special meaning may be\n assigned to the values corresponding to each key.\n\n* *__class__* assignment works only if both classes have the same\n *__slots__*.\n\n\nCustomizing class creation\n==========================\n\nBy default, classes are constructed using ``type()``. The class body\nis executed in a new namespace and the class name is bound locally to\nthe result of ``type(name, bases, namespace)``.\n\nThe class creation process can be customised by passing the\n``metaclass`` keyword argument in the class definition line, or by\ninheriting from an existing class that included such an argument. In\nthe following example, both ``MyClass`` and ``MySubclass`` are\ninstances of ``Meta``:\n\n class Meta(type):\n pass\n\n class MyClass(metaclass=Meta):\n pass\n\n class MySubclass(MyClass):\n pass\n\nAny other keyword arguments that are specified in the class definition\nare passed through to all metaclass operations described below.\n\nWhen a class definition is executed, the following steps occur:\n\n* the appropriate metaclass is determined\n\n* the class namespace is prepared\n\n* the class body is executed\n\n* the class object is created\n\n\nDetermining the appropriate metaclass\n-------------------------------------\n\nThe appropriate metaclass for a class definition is determined as\nfollows:\n\n* if no bases and no explicit metaclass are given, then ``type()`` is\n used\n\n* if an explicit metaclass is given and it is *not* an instance of\n ``type()``, then it is used directly as the metaclass\n\n* if an instance of ``type()`` is given as the explicit metaclass, or\n bases are defined, then the most derived metaclass is used\n\nThe most derived metaclass is selected from the explicitly specified\nmetaclass (if any) and the metaclasses (i.e. ``type(cls)``) of all\nspecified base classes. The most derived metaclass is one which is a\nsubtype of *all* of these candidate metaclasses. If none of the\ncandidate metaclasses meets that criterion, then the class definition\nwill fail with ``TypeError``.\n\n\nPreparing the class namespace\n-----------------------------\n\nOnce the appropriate metaclass has been identified, then the class\nnamespace is prepared. If the metaclass has a ``__prepare__``\nattribute, it is called as ``namespace = metaclass.__prepare__(name,\nbases, **kwds)`` (where the additional keyword arguments, if any, come\nfrom the class definition).\n\nIf the metaclass has no ``__prepare__`` attribute, then the class\nnamespace is initialised as an empty ``dict()`` instance.\n\nSee also:\n\n **PEP 3115** - Metaclasses in Python 3000\n Introduced the ``__prepare__`` namespace hook\n\n\nExecuting the class body\n------------------------\n\nThe class body is executed (approximately) as ``exec(body, globals(),\nnamespace)``. The key difference from a normal call to ``exec()`` is\nthat lexical scoping allows the class body (including any methods) to\nreference names from the current and outer scopes when the class\ndefinition occurs inside a function.\n\nHowever, even when the class definition occurs inside the function,\nmethods defined inside the class still cannot see names defined at the\nclass scope. Class variables must be accessed through the first\nparameter of instance or class methods, and cannot be accessed at all\nfrom static methods.\n\n\nCreating the class object\n-------------------------\n\nOnce the class namespace has been populated by executing the class\nbody, the class object is created by calling ``metaclass(name, bases,\nnamespace, **kwds)`` (the additional keywords passed here are the same\nas those passed to ``__prepare__``).\n\nThis class object is the one that will be referenced by the zero-\nargument form of ``super()``. ``__class__`` is an implicit closure\nreference created by the compiler if any methods in a class body refer\nto either ``__class__`` or ``super``. This allows the zero argument\nform of ``super()`` to correctly identify the class being defined\nbased on lexical scoping, while the class or instance that was used to\nmake the current call is identified based on the first argument passed\nto the method.\n\nAfter the class object is created, it is passed to the class\ndecorators included in the class definition (if any) and the resulting\nobject is bound in the local namespace as the defined class.\n\nSee also:\n\n **PEP 3135** - New super\n Describes the implicit ``__class__`` closure reference\n\n\nMetaclass example\n-----------------\n\nThe potential uses for metaclasses are boundless. Some ideas that have\nbeen explored include logging, interface checking, automatic\ndelegation, automatic property creation, proxies, frameworks, and\nautomatic resource locking/synchronization.\n\nHere is an example of a metaclass that uses an\n``collections.OrderedDict`` to remember the order that class members\nwere defined:\n\n class OrderedClass(type):\n\n @classmethod\n def __prepare__(metacls, name, bases, **kwds):\n return collections.OrderedDict()\n\n def __new__(cls, name, bases, namespace, **kwds):\n result = type.__new__(cls, name, bases, dict(namespace))\n result.members = tuple(namespace)\n return result\n\n class A(metaclass=OrderedClass):\n def one(self): pass\n def two(self): pass\n def three(self): pass\n def four(self): pass\n\n >>> A.members\n (\'__module__\', \'one\', \'two\', \'three\', \'four\')\n\nWhen the class definition for *A* gets executed, the process begins\nwith calling the metaclass\'s ``__prepare__()`` method which returns an\nempty ``collections.OrderedDict``. That mapping records the methods\nand attributes of *A* as they are defined within the body of the class\nstatement. Once those definitions are executed, the ordered dictionary\nis fully populated and the metaclass\'s ``__new__()`` method gets\ninvoked. That method builds the new type and it saves the ordered\ndictionary keys in an attribute called ``members``.\n\n\nCustomizing instance and subclass checks\n========================================\n\nThe following methods are used to override the default behavior of the\n``isinstance()`` and ``issubclass()`` built-in functions.\n\nIn particular, the metaclass ``abc.ABCMeta`` implements these methods\nin order to allow the addition of Abstract Base Classes (ABCs) as\n"virtual base classes" to any class or type (including built-in\ntypes), including other ABCs.\n\nclass.__instancecheck__(self, instance)\n\n Return true if *instance* should be considered a (direct or\n indirect) instance of *class*. If defined, called to implement\n ``isinstance(instance, class)``.\n\nclass.__subclasscheck__(self, subclass)\n\n Return true if *subclass* should be considered a (direct or\n indirect) subclass of *class*. If defined, called to implement\n ``issubclass(subclass, class)``.\n\nNote that these methods are looked up on the type (metaclass) of a\nclass. They cannot be defined as class methods in the actual class.\nThis is consistent with the lookup of special methods that are called\non instances, only in this case the instance is itself a class.\n\nSee also:\n\n **PEP 3119** - Introducing Abstract Base Classes\n Includes the specification for customizing ``isinstance()`` and\n ``issubclass()`` behavior through ``__instancecheck__()`` and\n ``__subclasscheck__()``, with motivation for this functionality\n in the context of adding Abstract Base Classes (see the ``abc``\n module) to the language.\n\n\nEmulating callable objects\n==========================\n\nobject.__call__(self[, args...])\n\n Called when the instance is "called" as a function; if this method\n is defined, ``x(arg1, arg2, ...)`` is a shorthand for\n ``x.__call__(arg1, arg2, ...)``.\n\n\nEmulating container types\n=========================\n\nThe following methods can be defined to implement container objects.\nContainers usually are sequences (such as lists or tuples) or mappings\n(like dictionaries), but can represent other containers as well. The\nfirst set of methods is used either to emulate a sequence or to\nemulate a mapping; the difference is that for a sequence, the\nallowable keys should be the integers *k* for which ``0 <= k < N``\nwhere *N* is the length of the sequence, or slice objects, which\ndefine a range of items. It is also recommended that mappings provide\nthe methods ``keys()``, ``values()``, ``items()``, ``get()``,\n``clear()``, ``setdefault()``, ``pop()``, ``popitem()``, ``copy()``,\nand ``update()`` behaving similar to those for Python\'s standard\ndictionary objects. The ``collections`` module provides a\n``MutableMapping`` abstract base class to help create those methods\nfrom a base set of ``__getitem__()``, ``__setitem__()``,\n``__delitem__()``, and ``keys()``. Mutable sequences should provide\nmethods ``append()``, ``count()``, ``index()``, ``extend()``,\n``insert()``, ``pop()``, ``remove()``, ``reverse()`` and ``sort()``,\nlike Python standard list objects. Finally, sequence types should\nimplement addition (meaning concatenation) and multiplication (meaning\nrepetition) by defining the methods ``__add__()``, ``__radd__()``,\n``__iadd__()``, ``__mul__()``, ``__rmul__()`` and ``__imul__()``\ndescribed below; they should not define other numerical operators. It\nis recommended that both mappings and sequences implement the\n``__contains__()`` method to allow efficient use of the ``in``\noperator; for mappings, ``in`` should search the mapping\'s keys; for\nsequences, it should search through the values. It is further\nrecommended that both mappings and sequences implement the\n``__iter__()`` method to allow efficient iteration through the\ncontainer; for mappings, ``__iter__()`` should be the same as\n``keys()``; for sequences, it should iterate through the values.\n\nobject.__len__(self)\n\n Called to implement the built-in function ``len()``. Should return\n the length of the object, an integer ``>=`` 0. Also, an object\n that doesn\'t define a ``__bool__()`` method and whose ``__len__()``\n method returns zero is considered to be false in a Boolean context.\n\nNote: Slicing is done exclusively with the following three methods. A\n call like\n\n a[1:2] = b\n\n is translated to\n\n a[slice(1, 2, None)] = b\n\n and so forth. Missing slice items are always filled in with\n ``None``.\n\nobject.__getitem__(self, key)\n\n Called to implement evaluation of ``self[key]``. For sequence\n types, the accepted keys should be integers and slice objects.\n Note that the special interpretation of negative indexes (if the\n class wishes to emulate a sequence type) is up to the\n ``__getitem__()`` method. If *key* is of an inappropriate type,\n ``TypeError`` may be raised; if of a value outside the set of\n indexes for the sequence (after any special interpretation of\n negative values), ``IndexError`` should be raised. For mapping\n types, if *key* is missing (not in the container), ``KeyError``\n should be raised.\n\n Note: ``for`` loops expect that an ``IndexError`` will be raised for\n illegal indexes to allow proper detection of the end of the\n sequence.\n\nobject.__setitem__(self, key, value)\n\n Called to implement assignment to ``self[key]``. Same note as for\n ``__getitem__()``. This should only be implemented for mappings if\n the objects support changes to the values for keys, or if new keys\n can be added, or for sequences if elements can be replaced. The\n same exceptions should be raised for improper *key* values as for\n the ``__getitem__()`` method.\n\nobject.__delitem__(self, key)\n\n Called to implement deletion of ``self[key]``. Same note as for\n ``__getitem__()``. This should only be implemented for mappings if\n the objects support removal of keys, or for sequences if elements\n can be removed from the sequence. The same exceptions should be\n raised for improper *key* values as for the ``__getitem__()``\n method.\n\nobject.__iter__(self)\n\n This method is called when an iterator is required for a container.\n This method should return a new iterator object that can iterate\n over all the objects in the container. For mappings, it should\n iterate over the keys of the container, and should also be made\n available as the method ``keys()``.\n\n Iterator objects also need to implement this method; they are\n required to return themselves. For more information on iterator\n objects, see *Iterator Types*.\n\nobject.__reversed__(self)\n\n Called (if present) by the ``reversed()`` built-in to implement\n reverse iteration. It should return a new iterator object that\n iterates over all the objects in the container in reverse order.\n\n If the ``__reversed__()`` method is not provided, the\n ``reversed()`` built-in will fall back to using the sequence\n protocol (``__len__()`` and ``__getitem__()``). Objects that\n support the sequence protocol should only provide\n ``__reversed__()`` if they can provide an implementation that is\n more efficient than the one provided by ``reversed()``.\n\nThe membership test operators (``in`` and ``not in``) are normally\nimplemented as an iteration through a sequence. However, container\nobjects can supply the following special method with a more efficient\nimplementation, which also does not require the object be a sequence.\n\nobject.__contains__(self, item)\n\n Called to implement membership test operators. Should return true\n if *item* is in *self*, false otherwise. For mapping objects, this\n should consider the keys of the mapping rather than the values or\n the key-item pairs.\n\n For objects that don\'t define ``__contains__()``, the membership\n test first tries iteration via ``__iter__()``, then the old\n sequence iteration protocol via ``__getitem__()``, see *this\n section in the language reference*.\n\n\nEmulating numeric types\n=======================\n\nThe following methods can be defined to emulate numeric objects.\nMethods corresponding to operations that are not supported by the\nparticular kind of number implemented (e.g., bitwise operations for\nnon-integral numbers) should be left undefined.\n\nobject.__add__(self, other)\nobject.__sub__(self, other)\nobject.__mul__(self, other)\nobject.__truediv__(self, other)\nobject.__floordiv__(self, other)\nobject.__mod__(self, other)\nobject.__divmod__(self, other)\nobject.__pow__(self, other[, modulo])\nobject.__lshift__(self, other)\nobject.__rshift__(self, other)\nobject.__and__(self, other)\nobject.__xor__(self, other)\nobject.__or__(self, other)\n\n These methods are called to implement the binary arithmetic\n operations (``+``, ``-``, ``*``, ``/``, ``//``, ``%``,\n ``divmod()``, ``pow()``, ``**``, ``<<``, ``>>``, ``&``, ``^``,\n ``|``). For instance, to evaluate the expression ``x + y``, where\n *x* is an instance of a class that has an ``__add__()`` method,\n ``x.__add__(y)`` is called. The ``__divmod__()`` method should be\n the equivalent to using ``__floordiv__()`` and ``__mod__()``; it\n should not be related to ``__truediv__()``. Note that\n ``__pow__()`` should be defined to accept an optional third\n argument if the ternary version of the built-in ``pow()`` function\n is to be supported.\n\n If one of those methods does not support the operation with the\n supplied arguments, it should return ``NotImplemented``.\n\nobject.__radd__(self, other)\nobject.__rsub__(self, other)\nobject.__rmul__(self, other)\nobject.__rtruediv__(self, other)\nobject.__rfloordiv__(self, other)\nobject.__rmod__(self, other)\nobject.__rdivmod__(self, other)\nobject.__rpow__(self, other)\nobject.__rlshift__(self, other)\nobject.__rrshift__(self, other)\nobject.__rand__(self, other)\nobject.__rxor__(self, other)\nobject.__ror__(self, other)\n\n These methods are called to implement the binary arithmetic\n operations (``+``, ``-``, ``*``, ``/``, ``//``, ``%``,\n ``divmod()``, ``pow()``, ``**``, ``<<``, ``>>``, ``&``, ``^``,\n ``|``) with reflected (swapped) operands. These functions are only\n called if the left operand does not support the corresponding\n operation and the operands are of different types. [2] For\n instance, to evaluate the expression ``x - y``, where *y* is an\n instance of a class that has an ``__rsub__()`` method,\n ``y.__rsub__(x)`` is called if ``x.__sub__(y)`` returns\n *NotImplemented*.\n\n Note that ternary ``pow()`` will not try calling ``__rpow__()``\n (the coercion rules would become too complicated).\n\n Note: If the right operand\'s type is a subclass of the left operand\'s\n type and that subclass provides the reflected method for the\n operation, this method will be called before the left operand\'s\n non-reflected method. This behavior allows subclasses to\n override their ancestors\' operations.\n\nobject.__iadd__(self, other)\nobject.__isub__(self, other)\nobject.__imul__(self, other)\nobject.__itruediv__(self, other)\nobject.__ifloordiv__(self, other)\nobject.__imod__(self, other)\nobject.__ipow__(self, other[, modulo])\nobject.__ilshift__(self, other)\nobject.__irshift__(self, other)\nobject.__iand__(self, other)\nobject.__ixor__(self, other)\nobject.__ior__(self, other)\n\n These methods are called to implement the augmented arithmetic\n assignments (``+=``, ``-=``, ``*=``, ``/=``, ``//=``, ``%=``,\n ``**=``, ``<<=``, ``>>=``, ``&=``, ``^=``, ``|=``). These methods\n should attempt to do the operation in-place (modifying *self*) and\n return the result (which could be, but does not have to be,\n *self*). If a specific method is not defined, the augmented\n assignment falls back to the normal methods. For instance, to\n execute the statement ``x += y``, where *x* is an instance of a\n class that has an ``__iadd__()`` method, ``x.__iadd__(y)`` is\n called. If *x* is an instance of a class that does not define a\n ``__iadd__()`` method, ``x.__add__(y)`` and ``y.__radd__(x)`` are\n considered, as with the evaluation of ``x + y``.\n\nobject.__neg__(self)\nobject.__pos__(self)\nobject.__abs__(self)\nobject.__invert__(self)\n\n Called to implement the unary arithmetic operations (``-``, ``+``,\n ``abs()`` and ``~``).\n\nobject.__complex__(self)\nobject.__int__(self)\nobject.__float__(self)\nobject.__round__(self[, n])\n\n Called to implement the built-in functions ``complex()``,\n ``int()``, ``float()`` and ``round()``. Should return a value of\n the appropriate type.\n\nobject.__index__(self)\n\n Called to implement ``operator.index()``. Also called whenever\n Python needs an integer object (such as in slicing, or in the\n built-in ``bin()``, ``hex()`` and ``oct()`` functions). Must return\n an integer.\n\n\nWith Statement Context Managers\n===============================\n\nA *context manager* is an object that defines the runtime context to\nbe established when executing a ``with`` statement. The context\nmanager handles the entry into, and the exit from, the desired runtime\ncontext for the execution of the block of code. Context managers are\nnormally invoked using the ``with`` statement (described in section\n*The with statement*), but can also be used by directly invoking their\nmethods.\n\nTypical uses of context managers include saving and restoring various\nkinds of global state, locking and unlocking resources, closing opened\nfiles, etc.\n\nFor more information on context managers, see *Context Manager Types*.\n\nobject.__enter__(self)\n\n Enter the runtime context related to this object. The ``with``\n statement will bind this method\'s return value to the target(s)\n specified in the ``as`` clause of the statement, if any.\n\nobject.__exit__(self, exc_type, exc_value, traceback)\n\n Exit the runtime context related to this object. The parameters\n describe the exception that caused the context to be exited. If the\n context was exited without an exception, all three arguments will\n be ``None``.\n\n If an exception is supplied, and the method wishes to suppress the\n exception (i.e., prevent it from being propagated), it should\n return a true value. Otherwise, the exception will be processed\n normally upon exit from this method.\n\n Note that ``__exit__()`` methods should not reraise the passed-in\n exception; this is the caller\'s responsibility.\n\nSee also:\n\n **PEP 0343** - The "with" statement\n The specification, background, and examples for the Python\n ``with`` statement.\n\n\nSpecial method lookup\n=====================\n\nFor custom classes, implicit invocations of special methods are only\nguaranteed to work correctly if defined on an object\'s type, not in\nthe object\'s instance dictionary. That behaviour is the reason why\nthe following code raises an exception:\n\n >>> class C:\n ... pass\n ...\n >>> c = C()\n >>> c.__len__ = lambda: 5\n >>> len(c)\n Traceback (most recent call last):\n File "<stdin>", line 1, in <module>\n TypeError: object of type \'C\' has no len()\n\nThe rationale behind this behaviour lies with a number of special\nmethods such as ``__hash__()`` and ``__repr__()`` that are implemented\nby all objects, including type objects. If the implicit lookup of\nthese methods used the conventional lookup process, they would fail\nwhen invoked on the type object itself:\n\n >>> 1 .__hash__() == hash(1)\n True\n >>> int.__hash__() == hash(int)\n Traceback (most recent call last):\n File "<stdin>", line 1, in <module>\n TypeError: descriptor \'__hash__\' of \'int\' object needs an argument\n\nIncorrectly attempting to invoke an unbound method of a class in this\nway is sometimes referred to as \'metaclass confusion\', and is avoided\nby bypassing the instance when looking up special methods:\n\n >>> type(1).__hash__(1) == hash(1)\n True\n >>> type(int).__hash__(int) == hash(int)\n True\n\nIn addition to bypassing any instance attributes in the interest of\ncorrectness, implicit special method lookup generally also bypasses\nthe ``__getattribute__()`` method even of the object\'s metaclass:\n\n >>> class Meta(type):\n ... def __getattribute__(*args):\n ... print("Metaclass getattribute invoked")\n ... return type.__getattribute__(*args)\n ...\n >>> class C(object, metaclass=Meta):\n ... def __len__(self):\n ... return 10\n ... def __getattribute__(*args):\n ... print("Class getattribute invoked")\n ... return object.__getattribute__(*args)\n ...\n >>> c = C()\n >>> c.__len__() # Explicit lookup via instance\n Class getattribute invoked\n 10\n >>> type(c).__len__(c) # Explicit lookup via type\n Metaclass getattribute invoked\n 10\n >>> len(c) # Implicit lookup\n 10\n\nBypassing the ``__getattribute__()`` machinery in this fashion\nprovides significant scope for speed optimisations within the\ninterpreter, at the cost of some flexibility in the handling of\nspecial methods (the special method *must* be set on the class object\nitself in order to be consistently invoked by the interpreter).\n\n-[ Footnotes ]-\n\n[1] It *is* possible in some cases to change an object\'s type, under\n certain controlled conditions. It generally isn\'t a good idea\n though, since it can lead to some very strange behaviour if it is\n handled incorrectly.\n\n[2] For operands of the same type, it is assumed that if the non-\n reflected method (such as ``__add__()``) fails the operation is\n not supported, which is why the reflected method is not called.\n',
+ 'string-methods': '\nString Methods\n**************\n\nStrings implement all of the *common* sequence operations, along with\nthe additional methods described below.\n\nStrings also support two styles of string formatting, one providing a\nlarge degree of flexibility and customization (see ``str.format()``,\n*Format String Syntax* and *String Formatting*) and the other based on\nC ``printf`` style formatting that handles a narrower range of types\nand is slightly harder to use correctly, but is often faster for the\ncases it can handle (*printf-style String Formatting*).\n\nThe *Text Processing Services* section of the standard library covers\na number of other modules that provide various text related utilities\n(including regular expression support in the ``re`` module).\n\nstr.capitalize()\n\n Return a copy of the string with its first character capitalized\n and the rest lowercased.\n\nstr.casefold()\n\n Return a casefolded copy of the string. Casefolded strings may be\n used for caseless matching.\n\n Casefolding is similar to lowercasing but more aggressive because\n it is intended to remove all case distinctions in a string. For\n example, the German lowercase letter ``\'\xc3\x9f\'`` is equivalent to\n ``"ss"``. Since it is already lowercase, ``lower()`` would do\n nothing to ``\'\xc3\x9f\'``; ``casefold()`` converts it to ``"ss"``.\n\n The casefolding algorithm is described in section 3.13 of the\n Unicode Standard.\n\n New in version 3.3.\n\nstr.center(width[, fillchar])\n\n Return centered in a string of length *width*. Padding is done\n using the specified *fillchar* (default is a space).\n\nstr.count(sub[, start[, end]])\n\n Return the number of non-overlapping occurrences of substring *sub*\n in the range [*start*, *end*]. Optional arguments *start* and\n *end* are interpreted as in slice notation.\n\nstr.encode(encoding="utf-8", errors="strict")\n\n Return an encoded version of the string as a bytes object. Default\n encoding is ``\'utf-8\'``. *errors* may be given to set a different\n error handling scheme. The default for *errors* is ``\'strict\'``,\n meaning that encoding errors raise a ``UnicodeError``. Other\n possible values are ``\'ignore\'``, ``\'replace\'``,\n ``\'xmlcharrefreplace\'``, ``\'backslashreplace\'`` and any other name\n registered via ``codecs.register_error()``, see section *Codec Base\n Classes*. For a list of possible encodings, see section *Standard\n Encodings*.\n\n Changed in version 3.1: Support for keyword arguments added.\n\nstr.endswith(suffix[, start[, end]])\n\n Return ``True`` if the string ends with the specified *suffix*,\n otherwise return ``False``. *suffix* can also be a tuple of\n suffixes to look for. With optional *start*, test beginning at\n that position. With optional *end*, stop comparing at that\n position.\n\nstr.expandtabs([tabsize])\n\n Return a copy of the string where all tab characters are replaced\n by zero or more spaces, depending on the current column and the\n given tab size. The column number is reset to zero after each\n newline occurring in the string. If *tabsize* is not given, a tab\n size of ``8`` characters is assumed. This doesn\'t understand other\n non-printing characters or escape sequences.\n\nstr.find(sub[, start[, end]])\n\n Return the lowest index in the string where substring *sub* is\n found, such that *sub* is contained in the slice ``s[start:end]``.\n Optional arguments *start* and *end* are interpreted as in slice\n notation. Return ``-1`` if *sub* is not found.\n\n Note: The ``find()`` method should be used only if you need to know the\n position of *sub*. To check if *sub* is a substring or not, use\n the ``in`` operator:\n\n >>> \'Py\' in \'Python\'\n True\n\nstr.format(*args, **kwargs)\n\n Perform a string formatting operation. The string on which this\n method is called can contain literal text or replacement fields\n delimited by braces ``{}``. Each replacement field contains either\n the numeric index of a positional argument, or the name of a\n keyword argument. Returns a copy of the string where each\n replacement field is replaced with the string value of the\n corresponding argument.\n\n >>> "The sum of 1 + 2 is {0}".format(1+2)\n \'The sum of 1 + 2 is 3\'\n\n See *Format String Syntax* for a description of the various\n formatting options that can be specified in format strings.\n\nstr.format_map(mapping)\n\n Similar to ``str.format(**mapping)``, except that ``mapping`` is\n used directly and not copied to a ``dict`` . This is useful if for\n example ``mapping`` is a dict subclass:\n\n >>> class Default(dict):\n ... def __missing__(self, key):\n ... return key\n ...\n >>> \'{name} was born in {country}\'.format_map(Default(name=\'Guido\'))\n \'Guido was born in country\'\n\n New in version 3.2.\n\nstr.index(sub[, start[, end]])\n\n Like ``find()``, but raise ``ValueError`` when the substring is not\n found.\n\nstr.isalnum()\n\n Return true if all characters in the string are alphanumeric and\n there is at least one character, false otherwise. A character\n ``c`` is alphanumeric if one of the following returns ``True``:\n ``c.isalpha()``, ``c.isdecimal()``, ``c.isdigit()``, or\n ``c.isnumeric()``.\n\nstr.isalpha()\n\n Return true if all characters in the string are alphabetic and\n there is at least one character, false otherwise. Alphabetic\n characters are those characters defined in the Unicode character\n database as "Letter", i.e., those with general category property\n being one of "Lm", "Lt", "Lu", "Ll", or "Lo". Note that this is\n different from the "Alphabetic" property defined in the Unicode\n Standard.\n\nstr.isdecimal()\n\n Return true if all characters in the string are decimal characters\n and there is at least one character, false otherwise. Decimal\n characters are those from general category "Nd". This category\n includes digit characters, and all characters that can be used to\n form decimal-radix numbers, e.g. U+0660, ARABIC-INDIC DIGIT ZERO.\n\nstr.isdigit()\n\n Return true if all characters in the string are digits and there is\n at least one character, false otherwise. Digits include decimal\n characters and digits that need special handling, such as the\n compatibility superscript digits. Formally, a digit is a character\n that has the property value Numeric_Type=Digit or\n Numeric_Type=Decimal.\n\nstr.isidentifier()\n\n Return true if the string is a valid identifier according to the\n language definition, section *Identifiers and keywords*.\n\nstr.islower()\n\n Return true if all cased characters [4] in the string are lowercase\n and there is at least one cased character, false otherwise.\n\nstr.isnumeric()\n\n Return true if all characters in the string are numeric characters,\n and there is at least one character, false otherwise. Numeric\n characters include digit characters, and all characters that have\n the Unicode numeric value property, e.g. U+2155, VULGAR FRACTION\n ONE FIFTH. Formally, numeric characters are those with the\n property value Numeric_Type=Digit, Numeric_Type=Decimal or\n Numeric_Type=Numeric.\n\nstr.isprintable()\n\n Return true if all characters in the string are printable or the\n string is empty, false otherwise. Nonprintable characters are\n those characters defined in the Unicode character database as\n "Other" or "Separator", excepting the ASCII space (0x20) which is\n considered printable. (Note that printable characters in this\n context are those which should not be escaped when ``repr()`` is\n invoked on a string. It has no bearing on the handling of strings\n written to ``sys.stdout`` or ``sys.stderr``.)\n\nstr.isspace()\n\n Return true if there are only whitespace characters in the string\n and there is at least one character, false otherwise. Whitespace\n characters are those characters defined in the Unicode character\n database as "Other" or "Separator" and those with bidirectional\n property being one of "WS", "B", or "S".\n\nstr.istitle()\n\n Return true if the string is a titlecased string and there is at\n least one character, for example uppercase characters may only\n follow uncased characters and lowercase characters only cased ones.\n Return false otherwise.\n\nstr.isupper()\n\n Return true if all cased characters [4] in the string are uppercase\n and there is at least one cased character, false otherwise.\n\nstr.join(iterable)\n\n Return a string which is the concatenation of the strings in the\n *iterable* *iterable*. A ``TypeError`` will be raised if there are\n any non-string values in *iterable*, including ``bytes`` objects.\n The separator between elements is the string providing this method.\n\nstr.ljust(width[, fillchar])\n\n Return the string left justified in a string of length *width*.\n Padding is done using the specified *fillchar* (default is a\n space). The original string is returned if *width* is less than or\n equal to ``len(s)``.\n\nstr.lower()\n\n Return a copy of the string with all the cased characters [4]\n converted to lowercase.\n\n The lowercasing algorithm used is described in section 3.13 of the\n Unicode Standard.\n\nstr.lstrip([chars])\n\n Return a copy of the string with leading characters removed. The\n *chars* argument is a string specifying the set of characters to be\n removed. If omitted or ``None``, the *chars* argument defaults to\n removing whitespace. The *chars* argument is not a prefix; rather,\n all combinations of its values are stripped:\n\n >>> \' spacious \'.lstrip()\n \'spacious \'\n >>> \'www.example.com\'.lstrip(\'cmowz.\')\n \'example.com\'\n\nstatic str.maketrans(x[, y[, z]])\n\n This static method returns a translation table usable for\n ``str.translate()``.\n\n If there is only one argument, it must be a dictionary mapping\n Unicode ordinals (integers) or characters (strings of length 1) to\n Unicode ordinals, strings (of arbitrary lengths) or None.\n Character keys will then be converted to ordinals.\n\n If there are two arguments, they must be strings of equal length,\n and in the resulting dictionary, each character in x will be mapped\n to the character at the same position in y. If there is a third\n argument, it must be a string, whose characters will be mapped to\n None in the result.\n\nstr.partition(sep)\n\n Split the string at the first occurrence of *sep*, and return a\n 3-tuple containing the part before the separator, the separator\n itself, and the part after the separator. If the separator is not\n found, return a 3-tuple containing the string itself, followed by\n two empty strings.\n\nstr.replace(old, new[, count])\n\n Return a copy of the string with all occurrences of substring *old*\n replaced by *new*. If the optional argument *count* is given, only\n the first *count* occurrences are replaced.\n\nstr.rfind(sub[, start[, end]])\n\n Return the highest index in the string where substring *sub* is\n found, such that *sub* is contained within ``s[start:end]``.\n Optional arguments *start* and *end* are interpreted as in slice\n notation. Return ``-1`` on failure.\n\nstr.rindex(sub[, start[, end]])\n\n Like ``rfind()`` but raises ``ValueError`` when the substring *sub*\n is not found.\n\nstr.rjust(width[, fillchar])\n\n Return the string right justified in a string of length *width*.\n Padding is done using the specified *fillchar* (default is a\n space). The original string is returned if *width* is less than or\n equal to ``len(s)``.\n\nstr.rpartition(sep)\n\n Split the string at the last occurrence of *sep*, and return a\n 3-tuple containing the part before the separator, the separator\n itself, and the part after the separator. If the separator is not\n found, return a 3-tuple containing two empty strings, followed by\n the string itself.\n\nstr.rsplit(sep=None, maxsplit=-1)\n\n Return a list of the words in the string, using *sep* as the\n delimiter string. If *maxsplit* is given, at most *maxsplit* splits\n are done, the *rightmost* ones. If *sep* is not specified or\n ``None``, any whitespace string is a separator. Except for\n splitting from the right, ``rsplit()`` behaves like ``split()``\n which is described in detail below.\n\nstr.rstrip([chars])\n\n Return a copy of the string with trailing characters removed. The\n *chars* argument is a string specifying the set of characters to be\n removed. If omitted or ``None``, the *chars* argument defaults to\n removing whitespace. The *chars* argument is not a suffix; rather,\n all combinations of its values are stripped:\n\n >>> \' spacious \'.rstrip()\n \' spacious\'\n >>> \'mississippi\'.rstrip(\'ipz\')\n \'mississ\'\n\nstr.split(sep=None, maxsplit=-1)\n\n Return a list of the words in the string, using *sep* as the\n delimiter string. If *maxsplit* is given, at most *maxsplit*\n splits are done (thus, the list will have at most ``maxsplit+1``\n elements). If *maxsplit* is not specified or ``-1``, then there is\n no limit on the number of splits (all possible splits are made).\n\n If *sep* is given, consecutive delimiters are not grouped together\n and are deemed to delimit empty strings (for example,\n ``\'1,,2\'.split(\',\')`` returns ``[\'1\', \'\', \'2\']``). The *sep*\n argument may consist of multiple characters (for example,\n ``\'1<>2<>3\'.split(\'<>\')`` returns ``[\'1\', \'2\', \'3\']``). Splitting\n an empty string with a specified separator returns ``[\'\']``.\n\n If *sep* is not specified or is ``None``, a different splitting\n algorithm is applied: runs of consecutive whitespace are regarded\n as a single separator, and the result will contain no empty strings\n at the start or end if the string has leading or trailing\n whitespace. Consequently, splitting an empty string or a string\n consisting of just whitespace with a ``None`` separator returns\n ``[]``.\n\n For example, ``\' 1 2 3 \'.split()`` returns ``[\'1\', \'2\', \'3\']``,\n and ``\' 1 2 3 \'.split(None, 1)`` returns ``[\'1\', \'2 3 \']``.\n\nstr.splitlines([keepends])\n\n Return a list of the lines in the string, breaking at line\n boundaries. This method uses the *universal newlines* approach to\n splitting lines. Line breaks are not included in the resulting list\n unless *keepends* is given and true.\n\n For example, ``\'ab c\\n\\nde fg\\rkl\\r\\n\'.splitlines()`` returns\n ``[\'ab c\', \'\', \'de fg\', \'kl\']``, while the same call with\n ``splitlines(True)`` returns ``[\'ab c\\n\', \'\\n\', \'de fg\\r\',\n \'kl\\r\\n\']``.\n\n Unlike ``split()`` when a delimiter string *sep* is given, this\n method returns an empty list for the empty string, and a terminal\n line break does not result in an extra line.\n\nstr.startswith(prefix[, start[, end]])\n\n Return ``True`` if string starts with the *prefix*, otherwise\n return ``False``. *prefix* can also be a tuple of prefixes to look\n for. With optional *start*, test string beginning at that\n position. With optional *end*, stop comparing string at that\n position.\n\nstr.strip([chars])\n\n Return a copy of the string with the leading and trailing\n characters removed. The *chars* argument is a string specifying the\n set of characters to be removed. If omitted or ``None``, the\n *chars* argument defaults to removing whitespace. The *chars*\n argument is not a prefix or suffix; rather, all combinations of its\n values are stripped:\n\n >>> \' spacious \'.strip()\n \'spacious\'\n >>> \'www.example.com\'.strip(\'cmowz.\')\n \'example\'\n\nstr.swapcase()\n\n Return a copy of the string with uppercase characters converted to\n lowercase and vice versa. Note that it is not necessarily true that\n ``s.swapcase().swapcase() == s``.\n\nstr.title()\n\n Return a titlecased version of the string where words start with an\n uppercase character and the remaining characters are lowercase.\n\n The algorithm uses a simple language-independent definition of a\n word as groups of consecutive letters. The definition works in\n many contexts but it means that apostrophes in contractions and\n possessives form word boundaries, which may not be the desired\n result:\n\n >>> "they\'re bill\'s friends from the UK".title()\n "They\'Re Bill\'S Friends From The Uk"\n\n A workaround for apostrophes can be constructed using regular\n expressions:\n\n >>> import re\n >>> def titlecase(s):\n return re.sub(r"[A-Za-z]+(\'[A-Za-z]+)?",\n lambda mo: mo.group(0)[0].upper() +\n mo.group(0)[1:].lower(),\n s)\n\n >>> titlecase("they\'re bill\'s friends.")\n "They\'re Bill\'s Friends."\n\nstr.translate(map)\n\n Return a copy of the *s* where all characters have been mapped\n through the *map* which must be a dictionary of Unicode ordinals\n (integers) to Unicode ordinals, strings or ``None``. Unmapped\n characters are left untouched. Characters mapped to ``None`` are\n deleted.\n\n You can use ``str.maketrans()`` to create a translation map from\n character-to-character mappings in different formats.\n\n Note: An even more flexible approach is to create a custom character\n mapping codec using the ``codecs`` module (see\n ``encodings.cp1251`` for an example).\n\nstr.upper()\n\n Return a copy of the string with all the cased characters [4]\n converted to uppercase. Note that ``str.upper().isupper()`` might\n be ``False`` if ``s`` contains uncased characters or if the Unicode\n category of the resulting character(s) is not "Lu" (Letter,\n uppercase), but e.g. "Lt" (Letter, titlecase).\n\n The uppercasing algorithm used is described in section 3.13 of the\n Unicode Standard.\n\nstr.zfill(width)\n\n Return the numeric string left filled with zeros in a string of\n length *width*. A sign prefix is handled correctly. The original\n string is returned if *width* is less than or equal to ``len(s)``.\n',
+ 'strings': '\nString and Bytes literals\n*************************\n\nString literals are described by the following lexical definitions:\n\n stringliteral ::= [stringprefix](shortstring | longstring)\n stringprefix ::= "r" | "u" | "R" | "U"\n shortstring ::= "\'" shortstringitem* "\'" | \'"\' shortstringitem* \'"\'\n longstring ::= "\'\'\'" longstringitem* "\'\'\'" | \'"""\' longstringitem* \'"""\'\n shortstringitem ::= shortstringchar | stringescapeseq\n longstringitem ::= longstringchar | stringescapeseq\n shortstringchar ::= <any source character except "\\" or newline or the quote>\n longstringchar ::= <any source character except "\\">\n stringescapeseq ::= "\\" <any source character>\n\n bytesliteral ::= bytesprefix(shortbytes | longbytes)\n bytesprefix ::= "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB"\n shortbytes ::= "\'" shortbytesitem* "\'" | \'"\' shortbytesitem* \'"\'\n longbytes ::= "\'\'\'" longbytesitem* "\'\'\'" | \'"""\' longbytesitem* \'"""\'\n shortbytesitem ::= shortbyteschar | bytesescapeseq\n longbytesitem ::= longbyteschar | bytesescapeseq\n shortbyteschar ::= <any ASCII character except "\\" or newline or the quote>\n longbyteschar ::= <any ASCII character except "\\">\n bytesescapeseq ::= "\\" <any ASCII character>\n\nOne syntactic restriction not indicated by these productions is that\nwhitespace is not allowed between the ``stringprefix`` or\n``bytesprefix`` and the rest of the literal. The source character set\nis defined by the encoding declaration; it is UTF-8 if no encoding\ndeclaration is given in the source file; see section *Encoding\ndeclarations*.\n\nIn plain English: Both types of literals can be enclosed in matching\nsingle quotes (``\'``) or double quotes (``"``). They can also be\nenclosed in matching groups of three single or double quotes (these\nare generally referred to as *triple-quoted strings*). The backslash\n(``\\``) character is used to escape characters that otherwise have a\nspecial meaning, such as newline, backslash itself, or the quote\ncharacter.\n\nBytes literals are always prefixed with ``\'b\'`` or ``\'B\'``; they\nproduce an instance of the ``bytes`` type instead of the ``str`` type.\nThey may only contain ASCII characters; bytes with a numeric value of\n128 or greater must be expressed with escapes.\n\nAs of Python 3.3 it is possible again to prefix unicode strings with a\n``u`` prefix to simplify maintenance of dual 2.x and 3.x codebases.\n\nBoth string and bytes literals may optionally be prefixed with a\nletter ``\'r\'`` or ``\'R\'``; such strings are called *raw strings* and\ntreat backslashes as literal characters. As a result, in string\nliterals, ``\'\\U\'`` and ``\'\\u\'`` escapes in raw strings are not treated\nspecially. Given that Python 2.x\'s raw unicode literals behave\ndifferently than Python 3.x\'s the ``\'ur\'`` syntax is not supported.\n\n New in version 3.3: The ``\'rb\'`` prefix of raw bytes literals has\n been added as a synonym of ``\'br\'``.\n\n New in version 3.3: Support for the unicode legacy literal\n (``u\'value\'``) was reintroduced to simplify the maintenance of dual\n Python 2.x and 3.x codebases. See **PEP 414** for more information.\n\nIn triple-quoted strings, unescaped newlines and quotes are allowed\n(and are retained), except that three unescaped quotes in a row\nterminate the string. (A "quote" is the character used to open the\nstring, i.e. either ``\'`` or ``"``.)\n\nUnless an ``\'r\'`` or ``\'R\'`` prefix is present, escape sequences in\nstrings are interpreted according to rules similar to those used by\nStandard C. The recognized escape sequences are:\n\n+-------------------+-----------------------------------+---------+\n| Escape Sequence | Meaning | Notes |\n+===================+===================================+=========+\n| ``\\newline`` | Backslash and newline ignored | |\n+-------------------+-----------------------------------+---------+\n| ``\\\\`` | Backslash (``\\``) | |\n+-------------------+-----------------------------------+---------+\n| ``\\\'`` | Single quote (``\'``) | |\n+-------------------+-----------------------------------+---------+\n| ``\\"`` | Double quote (``"``) | |\n+-------------------+-----------------------------------+---------+\n| ``\\a`` | ASCII Bell (BEL) | |\n+-------------------+-----------------------------------+---------+\n| ``\\b`` | ASCII Backspace (BS) | |\n+-------------------+-----------------------------------+---------+\n| ``\\f`` | ASCII Formfeed (FF) | |\n+-------------------+-----------------------------------+---------+\n| ``\\n`` | ASCII Linefeed (LF) | |\n+-------------------+-----------------------------------+---------+\n| ``\\r`` | ASCII Carriage Return (CR) | |\n+-------------------+-----------------------------------+---------+\n| ``\\t`` | ASCII Horizontal Tab (TAB) | |\n+-------------------+-----------------------------------+---------+\n| ``\\v`` | ASCII Vertical Tab (VT) | |\n+-------------------+-----------------------------------+---------+\n| ``\\ooo`` | Character with octal value *ooo* | (1,3) |\n+-------------------+-----------------------------------+---------+\n| ``\\xhh`` | Character with hex value *hh* | (2,3) |\n+-------------------+-----------------------------------+---------+\n\nEscape sequences only recognized in string literals are:\n\n+-------------------+-----------------------------------+---------+\n| Escape Sequence | Meaning | Notes |\n+===================+===================================+=========+\n| ``\\N{name}`` | Character named *name* in the | (4) |\n| | Unicode database | |\n+-------------------+-----------------------------------+---------+\n| ``\\uxxxx`` | Character with 16-bit hex value | (5) |\n| | *xxxx* | |\n+-------------------+-----------------------------------+---------+\n| ``\\Uxxxxxxxx`` | Character with 32-bit hex value | (6) |\n| | *xxxxxxxx* | |\n+-------------------+-----------------------------------+---------+\n\nNotes:\n\n1. As in Standard C, up to three octal digits are accepted.\n\n2. Unlike in Standard C, exactly two hex digits are required.\n\n3. In a bytes literal, hexadecimal and octal escapes denote the byte\n with the given value. In a string literal, these escapes denote a\n Unicode character with the given value.\n\n4. Changed in version 3.3: Support for name aliases [1] has been\n added.\n\n5. Individual code units which form parts of a surrogate pair can be\n encoded using this escape sequence. Exactly four hex digits are\n required.\n\n6. Any Unicode character can be encoded this way, but characters\n outside the Basic Multilingual Plane (BMP) will be encoded using a\n surrogate pair if Python is compiled to use 16-bit code units (the\n default). Exactly eight hex digits are required.\n\nUnlike Standard C, all unrecognized escape sequences are left in the\nstring unchanged, i.e., *the backslash is left in the string*. (This\nbehavior is useful when debugging: if an escape sequence is mistyped,\nthe resulting output is more easily recognized as broken.) It is also\nimportant to note that the escape sequences only recognized in string\nliterals fall into the category of unrecognized escapes for bytes\nliterals.\n\nEven in a raw string, string quotes can be escaped with a backslash,\nbut the backslash remains in the string; for example, ``r"\\""`` is a\nvalid string literal consisting of two characters: a backslash and a\ndouble quote; ``r"\\"`` is not a valid string literal (even a raw\nstring cannot end in an odd number of backslashes). Specifically, *a\nraw string cannot end in a single backslash* (since the backslash\nwould escape the following quote character). Note also that a single\nbackslash followed by a newline is interpreted as those two characters\nas part of the string, *not* as a line continuation.\n',
'subscriptions': '\nSubscriptions\n*************\n\nA subscription selects an item of a sequence (string, tuple or list)\nor mapping (dictionary) object:\n\n subscription ::= primary "[" expression_list "]"\n\nThe primary must evaluate to an object that supports subscription,\ne.g. a list or dictionary. User-defined objects can support\nsubscription by defining a ``__getitem__()`` method.\n\nFor built-in objects, there are two types of objects that support\nsubscription:\n\nIf the primary is a mapping, the expression list must evaluate to an\nobject whose value is one of the keys of the mapping, and the\nsubscription selects the value in the mapping that corresponds to that\nkey. (The expression list is a tuple except if it has exactly one\nitem.)\n\nIf the primary is a sequence, the expression (list) must evaluate to\nan integer or a slice (as discussed in the following section).\n\nThe formal syntax makes no special provision for negative indices in\nsequences; however, built-in sequences all provide a ``__getitem__()``\nmethod that interprets negative indices by adding the length of the\nsequence to the index (so that ``x[-1]`` selects the last item of\n``x``). The resulting value must be a nonnegative integer less than\nthe number of items in the sequence, and the subscription selects the\nitem whose index is that value (counting from zero). Since the support\nfor negative indices and slicing occurs in the object\'s\n``__getitem__()`` method, subclasses overriding this method will need\nto explicitly add that support.\n\nA string\'s items are characters. A character is not a separate data\ntype but a string of exactly one character.\n',
'truth': "\nTruth Value Testing\n*******************\n\nAny object can be tested for truth value, for use in an ``if`` or\n``while`` condition or as operand of the Boolean operations below. The\nfollowing values are considered false:\n\n* ``None``\n\n* ``False``\n\n* zero of any numeric type, for example, ``0``, ``0.0``, ``0j``.\n\n* any empty sequence, for example, ``''``, ``()``, ``[]``.\n\n* any empty mapping, for example, ``{}``.\n\n* instances of user-defined classes, if the class defines a\n ``__bool__()`` or ``__len__()`` method, when that method returns the\n integer zero or ``bool`` value ``False``. [1]\n\nAll other values are considered true --- so objects of many types are\nalways true.\n\nOperations and built-in functions that have a Boolean result always\nreturn ``0`` or ``False`` for false and ``1`` or ``True`` for true,\nunless otherwise stated. (Important exception: the Boolean operations\n``or`` and ``and`` always return one of their operands.)\n",
- 'try': '\nThe ``try`` statement\n*********************\n\nThe ``try`` statement specifies exception handlers and/or cleanup code\nfor a group of statements:\n\n try_stmt ::= try1_stmt | try2_stmt\n try1_stmt ::= "try" ":" suite\n ("except" [expression ["as" target]] ":" suite)+\n ["else" ":" suite]\n ["finally" ":" suite]\n try2_stmt ::= "try" ":" suite\n "finally" ":" suite\n\nThe ``except`` clause(s) specify one or more exception handlers. When\nno exception occurs in the ``try`` clause, no exception handler is\nexecuted. When an exception occurs in the ``try`` suite, a search for\nan exception handler is started. This search inspects the except\nclauses in turn until one is found that matches the exception. An\nexpression-less except clause, if present, must be last; it matches\nany exception. For an except clause with an expression, that\nexpression is evaluated, and the clause matches the exception if the\nresulting object is "compatible" with the exception. An object is\ncompatible with an exception if it is the class or a base class of the\nexception object or a tuple containing an item compatible with the\nexception.\n\nIf no except clause matches the exception, the search for an exception\nhandler continues in the surrounding code and on the invocation stack.\n[1]\n\nIf the evaluation of an expression in the header of an except clause\nraises an exception, the original search for a handler is canceled and\na search starts for the new exception in the surrounding code and on\nthe call stack (it is treated as if the entire ``try`` statement\nraised the exception).\n\nWhen a matching except clause is found, the exception is assigned to\nthe target specified after the ``as`` keyword in that except clause,\nif present, and the except clause\'s suite is executed. All except\nclauses must have an executable block. When the end of this block is\nreached, execution continues normally after the entire try statement.\n(This means that if two nested handlers exist for the same exception,\nand the exception occurs in the try clause of the inner handler, the\nouter handler will not handle the exception.)\n\nWhen an exception has been assigned using ``as target``, it is cleared\nat the end of the except clause. This is as if\n\n except E as N:\n foo\n\nwas translated to\n\n except E as N:\n try:\n foo\n finally:\n del N\n\nThis means the exception must be assigned to a different name to be\nable to refer to it after the except clause. Exceptions are cleared\nbecause with the traceback attached to them, they form a reference\ncycle with the stack frame, keeping all locals in that frame alive\nuntil the next garbage collection occurs.\n\nBefore an except clause\'s suite is executed, details about the\nexception are stored in the ``sys`` module and can be access via\n``sys.exc_info()``. ``sys.exc_info()`` returns a 3-tuple consisting of\nthe exception class, the exception instance and a traceback object\n(see section *The standard type hierarchy*) identifying the point in\nthe program where the exception occurred. ``sys.exc_info()`` values\nare restored to their previous values (before the call) when returning\nfrom a function that handled an exception.\n\nThe optional ``else`` clause is executed if and when control flows off\nthe end of the ``try`` clause. [2] Exceptions in the ``else`` clause\nare not handled by the preceding ``except`` clauses.\n\nIf ``finally`` is present, it specifies a \'cleanup\' handler. The\n``try`` clause is executed, including any ``except`` and ``else``\nclauses. If an exception occurs in any of the clauses and is not\nhandled, the exception is temporarily saved. The ``finally`` clause is\nexecuted. If there is a saved exception, it is re-raised at the end\nof the ``finally`` clause. If the ``finally`` clause raises another\nexception or executes a ``return`` or ``break`` statement, the saved\nexception is set as the context of the new exception. The exception\ninformation is not available to the program during execution of the\n``finally`` clause.\n\nWhen a ``return``, ``break`` or ``continue`` statement is executed in\nthe ``try`` suite of a ``try``...``finally`` statement, the\n``finally`` clause is also executed \'on the way out.\' A ``continue``\nstatement is illegal in the ``finally`` clause. (The reason is a\nproblem with the current implementation --- this restriction may be\nlifted in the future).\n\nAdditional information on exceptions can be found in section\n*Exceptions*, and information on using the ``raise`` statement to\ngenerate exceptions may be found in section *The raise statement*.\n',
- 'types': '\nThe standard type hierarchy\n***************************\n\nBelow is a list of the types that are built into Python. Extension\nmodules (written in C, Java, or other languages, depending on the\nimplementation) can define additional types. Future versions of\nPython may add types to the type hierarchy (e.g., rational numbers,\nefficiently stored arrays of integers, etc.), although such additions\nwill often be provided via the standard library instead.\n\nSome of the type descriptions below contain a paragraph listing\n\'special attributes.\' These are attributes that provide access to the\nimplementation and are not intended for general use. Their definition\nmay change in the future.\n\nNone\n This type has a single value. There is a single object with this\n value. This object is accessed through the built-in name ``None``.\n It is used to signify the absence of a value in many situations,\n e.g., it is returned from functions that don\'t explicitly return\n anything. Its truth value is false.\n\nNotImplemented\n This type has a single value. There is a single object with this\n value. This object is accessed through the built-in name\n ``NotImplemented``. Numeric methods and rich comparison methods may\n return this value if they do not implement the operation for the\n operands provided. (The interpreter will then try the reflected\n operation, or some other fallback, depending on the operator.) Its\n truth value is true.\n\nEllipsis\n This type has a single value. There is a single object with this\n value. This object is accessed through the literal ``...`` or the\n built-in name ``Ellipsis``. Its truth value is true.\n\n``numbers.Number``\n These are created by numeric literals and returned as results by\n arithmetic operators and arithmetic built-in functions. Numeric\n objects are immutable; once created their value never changes.\n Python numbers are of course strongly related to mathematical\n numbers, but subject to the limitations of numerical representation\n in computers.\n\n Python distinguishes between integers, floating point numbers, and\n complex numbers:\n\n ``numbers.Integral``\n These represent elements from the mathematical set of integers\n (positive and negative).\n\n There are two types of integers:\n\n Integers (``int``)\n\n These represent numbers in an unlimited range, subject to\n available (virtual) memory only. For the purpose of shift\n and mask operations, a binary representation is assumed, and\n negative numbers are represented in a variant of 2\'s\n complement which gives the illusion of an infinite string of\n sign bits extending to the left.\n\n Booleans (``bool``)\n These represent the truth values False and True. The two\n objects representing the values False and True are the only\n Boolean objects. The Boolean type is a subtype of the integer\n type, and Boolean values behave like the values 0 and 1,\n respectively, in almost all contexts, the exception being\n that when converted to a string, the strings ``"False"`` or\n ``"True"`` are returned, respectively.\n\n The rules for integer representation are intended to give the\n most meaningful interpretation of shift and mask operations\n involving negative integers.\n\n ``numbers.Real`` (``float``)\n These represent machine-level double precision floating point\n numbers. You are at the mercy of the underlying machine\n architecture (and C or Java implementation) for the accepted\n range and handling of overflow. Python does not support single-\n precision floating point numbers; the savings in processor and\n memory usage that are usually the reason for using these is\n dwarfed by the overhead of using objects in Python, so there is\n no reason to complicate the language with two kinds of floating\n point numbers.\n\n ``numbers.Complex`` (``complex``)\n These represent complex numbers as a pair of machine-level\n double precision floating point numbers. The same caveats apply\n as for floating point numbers. The real and imaginary parts of a\n complex number ``z`` can be retrieved through the read-only\n attributes ``z.real`` and ``z.imag``.\n\nSequences\n These represent finite ordered sets indexed by non-negative\n numbers. The built-in function ``len()`` returns the number of\n items of a sequence. When the length of a sequence is *n*, the\n index set contains the numbers 0, 1, ..., *n*-1. Item *i* of\n sequence *a* is selected by ``a[i]``.\n\n Sequences also support slicing: ``a[i:j]`` selects all items with\n index *k* such that *i* ``<=`` *k* ``<`` *j*. When used as an\n expression, a slice is a sequence of the same type. This implies\n that the index set is renumbered so that it starts at 0.\n\n Some sequences also support "extended slicing" with a third "step"\n parameter: ``a[i:j:k]`` selects all items of *a* with index *x*\n where ``x = i + n*k``, *n* ``>=`` ``0`` and *i* ``<=`` *x* ``<``\n *j*.\n\n Sequences are distinguished according to their mutability:\n\n Immutable sequences\n An object of an immutable sequence type cannot change once it is\n created. (If the object contains references to other objects,\n these other objects may be mutable and may be changed; however,\n the collection of objects directly referenced by an immutable\n object cannot change.)\n\n The following types are immutable sequences:\n\n Strings\n The items of a string object are Unicode code units. A\n Unicode code unit is represented by a string object of one\n item and can hold either a 16-bit or 32-bit value\n representing a Unicode ordinal (the maximum value for the\n ordinal is given in ``sys.maxunicode``, and depends on how\n Python is configured at compile time). Surrogate pairs may\n be present in the Unicode object, and will be reported as two\n separate items. The built-in functions ``chr()`` and\n ``ord()`` convert between code units and nonnegative integers\n representing the Unicode ordinals as defined in the Unicode\n Standard 3.0. Conversion from and to other encodings are\n possible through the string method ``encode()``.\n\n Tuples\n The items of a tuple are arbitrary Python objects. Tuples of\n two or more items are formed by comma-separated lists of\n expressions. A tuple of one item (a \'singleton\') can be\n formed by affixing a comma to an expression (an expression by\n itself does not create a tuple, since parentheses must be\n usable for grouping of expressions). An empty tuple can be\n formed by an empty pair of parentheses.\n\n Bytes\n A bytes object is an immutable array. The items are 8-bit\n bytes, represented by integers in the range 0 <= x < 256.\n Bytes literals (like ``b\'abc\'`` and the built-in function\n ``bytes()`` can be used to construct bytes objects. Also,\n bytes objects can be decoded to strings via the ``decode()``\n method.\n\n Mutable sequences\n Mutable sequences can be changed after they are created. The\n subscription and slicing notations can be used as the target of\n assignment and ``del`` (delete) statements.\n\n There are currently two intrinsic mutable sequence types:\n\n Lists\n The items of a list are arbitrary Python objects. Lists are\n formed by placing a comma-separated list of expressions in\n square brackets. (Note that there are no special cases needed\n to form lists of length 0 or 1.)\n\n Byte Arrays\n A bytearray object is a mutable array. They are created by\n the built-in ``bytearray()`` constructor. Aside from being\n mutable (and hence unhashable), byte arrays otherwise provide\n the same interface and functionality as immutable bytes\n objects.\n\n The extension module ``array`` provides an additional example of\n a mutable sequence type, as does the ``collections`` module.\n\nSet types\n These represent unordered, finite sets of unique, immutable\n objects. As such, they cannot be indexed by any subscript. However,\n they can be iterated over, and the built-in function ``len()``\n returns the number of items in a set. Common uses for sets are fast\n membership testing, removing duplicates from a sequence, and\n computing mathematical operations such as intersection, union,\n difference, and symmetric difference.\n\n For set elements, the same immutability rules apply as for\n dictionary keys. Note that numeric types obey the normal rules for\n numeric comparison: if two numbers compare equal (e.g., ``1`` and\n ``1.0``), only one of them can be contained in a set.\n\n There are currently two intrinsic set types:\n\n Sets\n These represent a mutable set. They are created by the built-in\n ``set()`` constructor and can be modified afterwards by several\n methods, such as ``add()``.\n\n Frozen sets\n These represent an immutable set. They are created by the\n built-in ``frozenset()`` constructor. As a frozenset is\n immutable and *hashable*, it can be used again as an element of\n another set, or as a dictionary key.\n\nMappings\n These represent finite sets of objects indexed by arbitrary index\n sets. The subscript notation ``a[k]`` selects the item indexed by\n ``k`` from the mapping ``a``; this can be used in expressions and\n as the target of assignments or ``del`` statements. The built-in\n function ``len()`` returns the number of items in a mapping.\n\n There is currently a single intrinsic mapping type:\n\n Dictionaries\n These represent finite sets of objects indexed by nearly\n arbitrary values. The only types of values not acceptable as\n keys are values containing lists or dictionaries or other\n mutable types that are compared by value rather than by object\n identity, the reason being that the efficient implementation of\n dictionaries requires a key\'s hash value to remain constant.\n Numeric types used for keys obey the normal rules for numeric\n comparison: if two numbers compare equal (e.g., ``1`` and\n ``1.0``) then they can be used interchangeably to index the same\n dictionary entry.\n\n Dictionaries are mutable; they can be created by the ``{...}``\n notation (see section *Dictionary displays*).\n\n The extension modules ``dbm.ndbm`` and ``dbm.gnu`` provide\n additional examples of mapping types, as does the\n ``collections`` module.\n\nCallable types\n These are the types to which the function call operation (see\n section *Calls*) can be applied:\n\n User-defined functions\n A user-defined function object is created by a function\n definition (see section *Function definitions*). It should be\n called with an argument list containing the same number of items\n as the function\'s formal parameter list.\n\n Special attributes:\n\n +---------------------------+---------------------------------+-------------+\n | Attribute | Meaning | |\n +===========================+=================================+=============+\n | ``__doc__`` | The function\'s documentation | Writable |\n | | string, or ``None`` if | |\n | | unavailable | |\n +---------------------------+---------------------------------+-------------+\n | ``__name__`` | The function\'s name | Writable |\n +---------------------------+---------------------------------+-------------+\n | ``__module__`` | The name of the module the | Writable |\n | | function was defined in, or | |\n | | ``None`` if unavailable. | |\n +---------------------------+---------------------------------+-------------+\n | ``__defaults__`` | A tuple containing default | Writable |\n | | argument values for those | |\n | | arguments that have defaults, | |\n | | or ``None`` if no arguments | |\n | | have a default value | |\n +---------------------------+---------------------------------+-------------+\n | ``__code__`` | The code object representing | Writable |\n | | the compiled function body. | |\n +---------------------------+---------------------------------+-------------+\n | ``__globals__`` | A reference to the dictionary | Read-only |\n | | that holds the function\'s | |\n | | global variables --- the global | |\n | | namespace of the module in | |\n | | which the function was defined. | |\n +---------------------------+---------------------------------+-------------+\n | ``__dict__`` | The namespace supporting | Writable |\n | | arbitrary function attributes. | |\n +---------------------------+---------------------------------+-------------+\n | ``__closure__`` | ``None`` or a tuple of cells | Read-only |\n | | that contain bindings for the | |\n | | function\'s free variables. | |\n +---------------------------+---------------------------------+-------------+\n | ``__annotations__`` | A dict containing annotations | Writable |\n | | of parameters. The keys of the | |\n | | dict are the parameter names, | |\n | | or ``\'return\'`` for the return | |\n | | annotation, if provided. | |\n +---------------------------+---------------------------------+-------------+\n | ``__kwdefaults__`` | A dict containing defaults for | Writable |\n | | keyword-only parameters. | |\n +---------------------------+---------------------------------+-------------+\n\n Most of the attributes labelled "Writable" check the type of the\n assigned value.\n\n Function objects also support getting and setting arbitrary\n attributes, which can be used, for example, to attach metadata\n to functions. Regular attribute dot-notation is used to get and\n set such attributes. *Note that the current implementation only\n supports function attributes on user-defined functions. Function\n attributes on built-in functions may be supported in the\n future.*\n\n Additional information about a function\'s definition can be\n retrieved from its code object; see the description of internal\n types below.\n\n Instance methods\n An instance method object combines a class, a class instance and\n any callable object (normally a user-defined function).\n\n Special read-only attributes: ``__self__`` is the class instance\n object, ``__func__`` is the function object; ``__doc__`` is the\n method\'s documentation (same as ``__func__.__doc__``);\n ``__name__`` is the method name (same as ``__func__.__name__``);\n ``__module__`` is the name of the module the method was defined\n in, or ``None`` if unavailable.\n\n Methods also support accessing (but not setting) the arbitrary\n function attributes on the underlying function object.\n\n User-defined method objects may be created when getting an\n attribute of a class (perhaps via an instance of that class), if\n that attribute is a user-defined function object or a class\n method object.\n\n When an instance method object is created by retrieving a user-\n defined function object from a class via one of its instances,\n its ``__self__`` attribute is the instance, and the method\n object is said to be bound. The new method\'s ``__func__``\n attribute is the original function object.\n\n When a user-defined method object is created by retrieving\n another method object from a class or instance, the behaviour is\n the same as for a function object, except that the ``__func__``\n attribute of the new instance is not the original method object\n but its ``__func__`` attribute.\n\n When an instance method object is created by retrieving a class\n method object from a class or instance, its ``__self__``\n attribute is the class itself, and its ``__func__`` attribute is\n the function object underlying the class method.\n\n When an instance method object is called, the underlying\n function (``__func__``) is called, inserting the class instance\n (``__self__``) in front of the argument list. For instance,\n when ``C`` is a class which contains a definition for a function\n ``f()``, and ``x`` is an instance of ``C``, calling ``x.f(1)``\n is equivalent to calling ``C.f(x, 1)``.\n\n When an instance method object is derived from a class method\n object, the "class instance" stored in ``__self__`` will\n actually be the class itself, so that calling either ``x.f(1)``\n or ``C.f(1)`` is equivalent to calling ``f(C,1)`` where ``f`` is\n the underlying function.\n\n Note that the transformation from function object to instance\n method object happens each time the attribute is retrieved from\n the instance. In some cases, a fruitful optimization is to\n assign the attribute to a local variable and call that local\n variable. Also notice that this transformation only happens for\n user-defined functions; other callable objects (and all non-\n callable objects) are retrieved without transformation. It is\n also important to note that user-defined functions which are\n attributes of a class instance are not converted to bound\n methods; this *only* happens when the function is an attribute\n of the class.\n\n Generator functions\n A function or method which uses the ``yield`` statement (see\n section *The yield statement*) is called a *generator function*.\n Such a function, when called, always returns an iterator object\n which can be used to execute the body of the function: calling\n the iterator\'s ``__next__()`` method will cause the function to\n execute until it provides a value using the ``yield`` statement.\n When the function executes a ``return`` statement or falls off\n the end, a ``StopIteration`` exception is raised and the\n iterator will have reached the end of the set of values to be\n returned.\n\n Built-in functions\n A built-in function object is a wrapper around a C function.\n Examples of built-in functions are ``len()`` and ``math.sin()``\n (``math`` is a standard built-in module). The number and type of\n the arguments are determined by the C function. Special read-\n only attributes: ``__doc__`` is the function\'s documentation\n string, or ``None`` if unavailable; ``__name__`` is the\n function\'s name; ``__self__`` is set to ``None`` (but see the\n next item); ``__module__`` is the name of the module the\n function was defined in or ``None`` if unavailable.\n\n Built-in methods\n This is really a different disguise of a built-in function, this\n time containing an object passed to the C function as an\n implicit extra argument. An example of a built-in method is\n ``alist.append()``, assuming *alist* is a list object. In this\n case, the special read-only attribute ``__self__`` is set to the\n object denoted by *alist*.\n\n Classes\n Classes are callable. These objects normally act as factories\n for new instances of themselves, but variations are possible for\n class types that override ``__new__()``. The arguments of the\n call are passed to ``__new__()`` and, in the typical case, to\n ``__init__()`` to initialize the new instance.\n\n Class Instances\n Instances of arbitrary classes can be made callable by defining\n a ``__call__()`` method in their class.\n\nModules\n Modules are imported by the ``import`` statement (see section *The\n import statement*). A module object has a namespace implemented by\n a dictionary object (this is the dictionary referenced by the\n __globals__ attribute of functions defined in the module).\n Attribute references are translated to lookups in this dictionary,\n e.g., ``m.x`` is equivalent to ``m.__dict__["x"]``. A module object\n does not contain the code object used to initialize the module\n (since it isn\'t needed once the initialization is done).\n\n Attribute assignment updates the module\'s namespace dictionary,\n e.g., ``m.x = 1`` is equivalent to ``m.__dict__["x"] = 1``.\n\n Special read-only attribute: ``__dict__`` is the module\'s namespace\n as a dictionary object.\n\n **CPython implementation detail:** Because of the way CPython\n clears module dictionaries, the module dictionary will be cleared\n when the module falls out of scope even if the dictionary still has\n live references. To avoid this, copy the dictionary or keep the\n module around while using its dictionary directly.\n\n Predefined (writable) attributes: ``__name__`` is the module\'s\n name; ``__doc__`` is the module\'s documentation string, or ``None``\n if unavailable; ``__file__`` is the pathname of the file from which\n the module was loaded, if it was loaded from a file. The\n ``__file__`` attribute is not present for C modules that are\n statically linked into the interpreter; for extension modules\n loaded dynamically from a shared library, it is the pathname of the\n shared library file.\n\nCustom classes\n Custom class types are typically created by class definitions (see\n section *Class definitions*). A class has a namespace implemented\n by a dictionary object. Class attribute references are translated\n to lookups in this dictionary, e.g., ``C.x`` is translated to\n ``C.__dict__["x"]`` (although there are a number of hooks which\n allow for other means of locating attributes). When the attribute\n name is not found there, the attribute search continues in the base\n classes. This search of the base classes uses the C3 method\n resolution order which behaves correctly even in the presence of\n \'diamond\' inheritance structures where there are multiple\n inheritance paths leading back to a common ancestor. Additional\n details on the C3 MRO used by Python can be found in the\n documentation accompanying the 2.3 release at\n http://www.python.org/download/releases/2.3/mro/.\n\n When a class attribute reference (for class ``C``, say) would yield\n a class method object, it is transformed into an instance method\n object whose ``__self__`` attributes is ``C``. When it would yield\n a static method object, it is transformed into the object wrapped\n by the static method object. See section *Implementing Descriptors*\n for another way in which attributes retrieved from a class may\n differ from those actually contained in its ``__dict__``.\n\n Class attribute assignments update the class\'s dictionary, never\n the dictionary of a base class.\n\n A class object can be called (see above) to yield a class instance\n (see below).\n\n Special attributes: ``__name__`` is the class name; ``__module__``\n is the module name in which the class was defined; ``__dict__`` is\n the dictionary containing the class\'s namespace; ``__bases__`` is a\n tuple (possibly empty or a singleton) containing the base classes,\n in the order of their occurrence in the base class list;\n ``__doc__`` is the class\'s documentation string, or None if\n undefined.\n\nClass instances\n A class instance is created by calling a class object (see above).\n A class instance has a namespace implemented as a dictionary which\n is the first place in which attribute references are searched.\n When an attribute is not found there, and the instance\'s class has\n an attribute by that name, the search continues with the class\n attributes. If a class attribute is found that is a user-defined\n function object, it is transformed into an instance method object\n whose ``__self__`` attribute is the instance. Static method and\n class method objects are also transformed; see above under\n "Classes". See section *Implementing Descriptors* for another way\n in which attributes of a class retrieved via its instances may\n differ from the objects actually stored in the class\'s\n ``__dict__``. If no class attribute is found, and the object\'s\n class has a ``__getattr__()`` method, that is called to satisfy the\n lookup.\n\n Attribute assignments and deletions update the instance\'s\n dictionary, never a class\'s dictionary. If the class has a\n ``__setattr__()`` or ``__delattr__()`` method, this is called\n instead of updating the instance dictionary directly.\n\n Class instances can pretend to be numbers, sequences, or mappings\n if they have methods with certain special names. See section\n *Special method names*.\n\n Special attributes: ``__dict__`` is the attribute dictionary;\n ``__class__`` is the instance\'s class.\n\nI/O objects (also known as file objects)\n A *file object* represents an open file. Various shortcuts are\n available to create file objects: the ``open()`` built-in function,\n and also ``os.popen()``, ``os.fdopen()``, and the ``makefile()``\n method of socket objects (and perhaps by other functions or methods\n provided by extension modules).\n\n The objects ``sys.stdin``, ``sys.stdout`` and ``sys.stderr`` are\n initialized to file objects corresponding to the interpreter\'s\n standard input, output and error streams; they are all open in text\n mode and therefore follow the interface defined by the\n ``io.TextIOBase`` abstract class.\n\nInternal types\n A few types used internally by the interpreter are exposed to the\n user. Their definitions may change with future versions of the\n interpreter, but they are mentioned here for completeness.\n\n Code objects\n Code objects represent *byte-compiled* executable Python code,\n or *bytecode*. The difference between a code object and a\n function object is that the function object contains an explicit\n reference to the function\'s globals (the module in which it was\n defined), while a code object contains no context; also the\n default argument values are stored in the function object, not\n in the code object (because they represent values calculated at\n run-time). Unlike function objects, code objects are immutable\n and contain no references (directly or indirectly) to mutable\n objects.\n\n Special read-only attributes: ``co_name`` gives the function\n name; ``co_argcount`` is the number of positional arguments\n (including arguments with default values); ``co_nlocals`` is the\n number of local variables used by the function (including\n arguments); ``co_varnames`` is a tuple containing the names of\n the local variables (starting with the argument names);\n ``co_cellvars`` is a tuple containing the names of local\n variables that are referenced by nested functions;\n ``co_freevars`` is a tuple containing the names of free\n variables; ``co_code`` is a string representing the sequence of\n bytecode instructions; ``co_consts`` is a tuple containing the\n literals used by the bytecode; ``co_names`` is a tuple\n containing the names used by the bytecode; ``co_filename`` is\n the filename from which the code was compiled;\n ``co_firstlineno`` is the first line number of the function;\n ``co_lnotab`` is a string encoding the mapping from bytecode\n offsets to line numbers (for details see the source code of the\n interpreter); ``co_stacksize`` is the required stack size\n (including local variables); ``co_flags`` is an integer encoding\n a number of flags for the interpreter.\n\n The following flag bits are defined for ``co_flags``: bit\n ``0x04`` is set if the function uses the ``*arguments`` syntax\n to accept an arbitrary number of positional arguments; bit\n ``0x08`` is set if the function uses the ``**keywords`` syntax\n to accept arbitrary keyword arguments; bit ``0x20`` is set if\n the function is a generator.\n\n Future feature declarations (``from __future__ import\n division``) also use bits in ``co_flags`` to indicate whether a\n code object was compiled with a particular feature enabled: bit\n ``0x2000`` is set if the function was compiled with future\n division enabled; bits ``0x10`` and ``0x1000`` were used in\n earlier versions of Python.\n\n Other bits in ``co_flags`` are reserved for internal use.\n\n If a code object represents a function, the first item in\n ``co_consts`` is the documentation string of the function, or\n ``None`` if undefined.\n\n Frame objects\n Frame objects represent execution frames. They may occur in\n traceback objects (see below).\n\n Special read-only attributes: ``f_back`` is to the previous\n stack frame (towards the caller), or ``None`` if this is the\n bottom stack frame; ``f_code`` is the code object being executed\n in this frame; ``f_locals`` is the dictionary used to look up\n local variables; ``f_globals`` is used for global variables;\n ``f_builtins`` is used for built-in (intrinsic) names;\n ``f_lasti`` gives the precise instruction (this is an index into\n the bytecode string of the code object).\n\n Special writable attributes: ``f_trace``, if not ``None``, is a\n function called at the start of each source code line (this is\n used by the debugger); ``f_lineno`` is the current line number\n of the frame --- writing to this from within a trace function\n jumps to the given line (only for the bottom-most frame). A\n debugger can implement a Jump command (aka Set Next Statement)\n by writing to f_lineno.\n\n Traceback objects\n Traceback objects represent a stack trace of an exception. A\n traceback object is created when an exception occurs. When the\n search for an exception handler unwinds the execution stack, at\n each unwound level a traceback object is inserted in front of\n the current traceback. When an exception handler is entered,\n the stack trace is made available to the program. (See section\n *The try statement*.) It is accessible as the third item of the\n tuple returned by ``sys.exc_info()``. When the program contains\n no suitable handler, the stack trace is written (nicely\n formatted) to the standard error stream; if the interpreter is\n interactive, it is also made available to the user as\n ``sys.last_traceback``.\n\n Special read-only attributes: ``tb_next`` is the next level in\n the stack trace (towards the frame where the exception\n occurred), or ``None`` if there is no next level; ``tb_frame``\n points to the execution frame of the current level;\n ``tb_lineno`` gives the line number where the exception\n occurred; ``tb_lasti`` indicates the precise instruction. The\n line number and last instruction in the traceback may differ\n from the line number of its frame object if the exception\n occurred in a ``try`` statement with no matching except clause\n or with a finally clause.\n\n Slice objects\n Slice objects are used to represent slices for ``__getitem__()``\n methods. They are also created by the built-in ``slice()``\n function.\n\n Special read-only attributes: ``start`` is the lower bound;\n ``stop`` is the upper bound; ``step`` is the step value; each is\n ``None`` if omitted. These attributes can have any type.\n\n Slice objects support one method:\n\n slice.indices(self, length)\n\n This method takes a single integer argument *length* and\n computes information about the slice that the slice object\n would describe if applied to a sequence of *length* items.\n It returns a tuple of three integers; respectively these are\n the *start* and *stop* indices and the *step* or stride\n length of the slice. Missing or out-of-bounds indices are\n handled in a manner consistent with regular slices.\n\n Static method objects\n Static method objects provide a way of defeating the\n transformation of function objects to method objects described\n above. A static method object is a wrapper around any other\n object, usually a user-defined method object. When a static\n method object is retrieved from a class or a class instance, the\n object actually returned is the wrapped object, which is not\n subject to any further transformation. Static method objects are\n not themselves callable, although the objects they wrap usually\n are. Static method objects are created by the built-in\n ``staticmethod()`` constructor.\n\n Class method objects\n A class method object, like a static method object, is a wrapper\n around another object that alters the way in which that object\n is retrieved from classes and class instances. The behaviour of\n class method objects upon such retrieval is described above,\n under "User-defined methods". Class method objects are created\n by the built-in ``classmethod()`` constructor.\n',
+ 'try': '\nThe ``try`` statement\n*********************\n\nThe ``try`` statement specifies exception handlers and/or cleanup code\nfor a group of statements:\n\n try_stmt ::= try1_stmt | try2_stmt\n try1_stmt ::= "try" ":" suite\n ("except" [expression ["as" target]] ":" suite)+\n ["else" ":" suite]\n ["finally" ":" suite]\n try2_stmt ::= "try" ":" suite\n "finally" ":" suite\n\nThe ``except`` clause(s) specify one or more exception handlers. When\nno exception occurs in the ``try`` clause, no exception handler is\nexecuted. When an exception occurs in the ``try`` suite, a search for\nan exception handler is started. This search inspects the except\nclauses in turn until one is found that matches the exception. An\nexpression-less except clause, if present, must be last; it matches\nany exception. For an except clause with an expression, that\nexpression is evaluated, and the clause matches the exception if the\nresulting object is "compatible" with the exception. An object is\ncompatible with an exception if it is the class or a base class of the\nexception object or a tuple containing an item compatible with the\nexception.\n\nIf no except clause matches the exception, the search for an exception\nhandler continues in the surrounding code and on the invocation stack.\n[1]\n\nIf the evaluation of an expression in the header of an except clause\nraises an exception, the original search for a handler is canceled and\na search starts for the new exception in the surrounding code and on\nthe call stack (it is treated as if the entire ``try`` statement\nraised the exception).\n\nWhen a matching except clause is found, the exception is assigned to\nthe target specified after the ``as`` keyword in that except clause,\nif present, and the except clause\'s suite is executed. All except\nclauses must have an executable block. When the end of this block is\nreached, execution continues normally after the entire try statement.\n(This means that if two nested handlers exist for the same exception,\nand the exception occurs in the try clause of the inner handler, the\nouter handler will not handle the exception.)\n\nWhen an exception has been assigned using ``as target``, it is cleared\nat the end of the except clause. This is as if\n\n except E as N:\n foo\n\nwas translated to\n\n except E as N:\n try:\n foo\n finally:\n del N\n\nThis means the exception must be assigned to a different name to be\nable to refer to it after the except clause. Exceptions are cleared\nbecause with the traceback attached to them, they form a reference\ncycle with the stack frame, keeping all locals in that frame alive\nuntil the next garbage collection occurs.\n\nBefore an except clause\'s suite is executed, details about the\nexception are stored in the ``sys`` module and can be access via\n``sys.exc_info()``. ``sys.exc_info()`` returns a 3-tuple consisting of\nthe exception class, the exception instance and a traceback object\n(see section *The standard type hierarchy*) identifying the point in\nthe program where the exception occurred. ``sys.exc_info()`` values\nare restored to their previous values (before the call) when returning\nfrom a function that handled an exception.\n\nThe optional ``else`` clause is executed if and when control flows off\nthe end of the ``try`` clause. [2] Exceptions in the ``else`` clause\nare not handled by the preceding ``except`` clauses.\n\nIf ``finally`` is present, it specifies a \'cleanup\' handler. The\n``try`` clause is executed, including any ``except`` and ``else``\nclauses. If an exception occurs in any of the clauses and is not\nhandled, the exception is temporarily saved. The ``finally`` clause is\nexecuted. If there is a saved exception or ``break`` statement, it is\nre-raised at the end of the ``finally`` clause. If the ``finally``\nclause raises another exception the saved exception is set as the\ncontext of the new exception; if the ``finally`` clause executes a\n``return`` statement, the saved exception is discarded:\n\n def f():\n try:\n 1/0\n finally:\n return 42\n\n >>> f()\n 42\n\nThe exception information is not available to the program during\nexecution of the ``finally`` clause.\n\nWhen a ``return``, ``break`` or ``continue`` statement is executed in\nthe ``try`` suite of a ``try``...``finally`` statement, the\n``finally`` clause is also executed \'on the way out.\' A ``continue``\nstatement is illegal in the ``finally`` clause. (The reason is a\nproblem with the current implementation --- this restriction may be\nlifted in the future).\n\nAdditional information on exceptions can be found in section\n*Exceptions*, and information on using the ``raise`` statement to\ngenerate exceptions may be found in section *The raise statement*.\n',
+ 'types': '\nThe standard type hierarchy\n***************************\n\nBelow is a list of the types that are built into Python. Extension\nmodules (written in C, Java, or other languages, depending on the\nimplementation) can define additional types. Future versions of\nPython may add types to the type hierarchy (e.g., rational numbers,\nefficiently stored arrays of integers, etc.), although such additions\nwill often be provided via the standard library instead.\n\nSome of the type descriptions below contain a paragraph listing\n\'special attributes.\' These are attributes that provide access to the\nimplementation and are not intended for general use. Their definition\nmay change in the future.\n\nNone\n This type has a single value. There is a single object with this\n value. This object is accessed through the built-in name ``None``.\n It is used to signify the absence of a value in many situations,\n e.g., it is returned from functions that don\'t explicitly return\n anything. Its truth value is false.\n\nNotImplemented\n This type has a single value. There is a single object with this\n value. This object is accessed through the built-in name\n ``NotImplemented``. Numeric methods and rich comparison methods may\n return this value if they do not implement the operation for the\n operands provided. (The interpreter will then try the reflected\n operation, or some other fallback, depending on the operator.) Its\n truth value is true.\n\nEllipsis\n This type has a single value. There is a single object with this\n value. This object is accessed through the literal ``...`` or the\n built-in name ``Ellipsis``. Its truth value is true.\n\n``numbers.Number``\n These are created by numeric literals and returned as results by\n arithmetic operators and arithmetic built-in functions. Numeric\n objects are immutable; once created their value never changes.\n Python numbers are of course strongly related to mathematical\n numbers, but subject to the limitations of numerical representation\n in computers.\n\n Python distinguishes between integers, floating point numbers, and\n complex numbers:\n\n ``numbers.Integral``\n These represent elements from the mathematical set of integers\n (positive and negative).\n\n There are two types of integers:\n\n Integers (``int``)\n\n These represent numbers in an unlimited range, subject to\n available (virtual) memory only. For the purpose of shift\n and mask operations, a binary representation is assumed, and\n negative numbers are represented in a variant of 2\'s\n complement which gives the illusion of an infinite string of\n sign bits extending to the left.\n\n Booleans (``bool``)\n These represent the truth values False and True. The two\n objects representing the values False and True are the only\n Boolean objects. The Boolean type is a subtype of the integer\n type, and Boolean values behave like the values 0 and 1,\n respectively, in almost all contexts, the exception being\n that when converted to a string, the strings ``"False"`` or\n ``"True"`` are returned, respectively.\n\n The rules for integer representation are intended to give the\n most meaningful interpretation of shift and mask operations\n involving negative integers.\n\n ``numbers.Real`` (``float``)\n These represent machine-level double precision floating point\n numbers. You are at the mercy of the underlying machine\n architecture (and C or Java implementation) for the accepted\n range and handling of overflow. Python does not support single-\n precision floating point numbers; the savings in processor and\n memory usage that are usually the reason for using these is\n dwarfed by the overhead of using objects in Python, so there is\n no reason to complicate the language with two kinds of floating\n point numbers.\n\n ``numbers.Complex`` (``complex``)\n These represent complex numbers as a pair of machine-level\n double precision floating point numbers. The same caveats apply\n as for floating point numbers. The real and imaginary parts of a\n complex number ``z`` can be retrieved through the read-only\n attributes ``z.real`` and ``z.imag``.\n\nSequences\n These represent finite ordered sets indexed by non-negative\n numbers. The built-in function ``len()`` returns the number of\n items of a sequence. When the length of a sequence is *n*, the\n index set contains the numbers 0, 1, ..., *n*-1. Item *i* of\n sequence *a* is selected by ``a[i]``.\n\n Sequences also support slicing: ``a[i:j]`` selects all items with\n index *k* such that *i* ``<=`` *k* ``<`` *j*. When used as an\n expression, a slice is a sequence of the same type. This implies\n that the index set is renumbered so that it starts at 0.\n\n Some sequences also support "extended slicing" with a third "step"\n parameter: ``a[i:j:k]`` selects all items of *a* with index *x*\n where ``x = i + n*k``, *n* ``>=`` ``0`` and *i* ``<=`` *x* ``<``\n *j*.\n\n Sequences are distinguished according to their mutability:\n\n Immutable sequences\n An object of an immutable sequence type cannot change once it is\n created. (If the object contains references to other objects,\n these other objects may be mutable and may be changed; however,\n the collection of objects directly referenced by an immutable\n object cannot change.)\n\n The following types are immutable sequences:\n\n Strings\n A string is a sequence of values that represent Unicode\n codepoints. All the codepoints in range ``U+0000 - U+10FFFF``\n can be represented in a string. Python doesn\'t have a\n ``chr`` type, and every character in the string is\n represented as a string object with length ``1``. The built-\n in function ``ord()`` converts a character to its codepoint\n (as an integer); ``chr()`` converts an integer in range ``0 -\n 10FFFF`` to the corresponding character. ``str.encode()`` can\n be used to convert a ``str`` to ``bytes`` using the given\n encoding, and ``bytes.decode()`` can be used to achieve the\n opposite.\n\n Tuples\n The items of a tuple are arbitrary Python objects. Tuples of\n two or more items are formed by comma-separated lists of\n expressions. A tuple of one item (a \'singleton\') can be\n formed by affixing a comma to an expression (an expression by\n itself does not create a tuple, since parentheses must be\n usable for grouping of expressions). An empty tuple can be\n formed by an empty pair of parentheses.\n\n Bytes\n A bytes object is an immutable array. The items are 8-bit\n bytes, represented by integers in the range 0 <= x < 256.\n Bytes literals (like ``b\'abc\'`` and the built-in function\n ``bytes()`` can be used to construct bytes objects. Also,\n bytes objects can be decoded to strings via the ``decode()``\n method.\n\n Mutable sequences\n Mutable sequences can be changed after they are created. The\n subscription and slicing notations can be used as the target of\n assignment and ``del`` (delete) statements.\n\n There are currently two intrinsic mutable sequence types:\n\n Lists\n The items of a list are arbitrary Python objects. Lists are\n formed by placing a comma-separated list of expressions in\n square brackets. (Note that there are no special cases needed\n to form lists of length 0 or 1.)\n\n Byte Arrays\n A bytearray object is a mutable array. They are created by\n the built-in ``bytearray()`` constructor. Aside from being\n mutable (and hence unhashable), byte arrays otherwise provide\n the same interface and functionality as immutable bytes\n objects.\n\n The extension module ``array`` provides an additional example of\n a mutable sequence type, as does the ``collections`` module.\n\nSet types\n These represent unordered, finite sets of unique, immutable\n objects. As such, they cannot be indexed by any subscript. However,\n they can be iterated over, and the built-in function ``len()``\n returns the number of items in a set. Common uses for sets are fast\n membership testing, removing duplicates from a sequence, and\n computing mathematical operations such as intersection, union,\n difference, and symmetric difference.\n\n For set elements, the same immutability rules apply as for\n dictionary keys. Note that numeric types obey the normal rules for\n numeric comparison: if two numbers compare equal (e.g., ``1`` and\n ``1.0``), only one of them can be contained in a set.\n\n There are currently two intrinsic set types:\n\n Sets\n These represent a mutable set. They are created by the built-in\n ``set()`` constructor and can be modified afterwards by several\n methods, such as ``add()``.\n\n Frozen sets\n These represent an immutable set. They are created by the\n built-in ``frozenset()`` constructor. As a frozenset is\n immutable and *hashable*, it can be used again as an element of\n another set, or as a dictionary key.\n\nMappings\n These represent finite sets of objects indexed by arbitrary index\n sets. The subscript notation ``a[k]`` selects the item indexed by\n ``k`` from the mapping ``a``; this can be used in expressions and\n as the target of assignments or ``del`` statements. The built-in\n function ``len()`` returns the number of items in a mapping.\n\n There is currently a single intrinsic mapping type:\n\n Dictionaries\n These represent finite sets of objects indexed by nearly\n arbitrary values. The only types of values not acceptable as\n keys are values containing lists or dictionaries or other\n mutable types that are compared by value rather than by object\n identity, the reason being that the efficient implementation of\n dictionaries requires a key\'s hash value to remain constant.\n Numeric types used for keys obey the normal rules for numeric\n comparison: if two numbers compare equal (e.g., ``1`` and\n ``1.0``) then they can be used interchangeably to index the same\n dictionary entry.\n\n Dictionaries are mutable; they can be created by the ``{...}``\n notation (see section *Dictionary displays*).\n\n The extension modules ``dbm.ndbm`` and ``dbm.gnu`` provide\n additional examples of mapping types, as does the\n ``collections`` module.\n\nCallable types\n These are the types to which the function call operation (see\n section *Calls*) can be applied:\n\n User-defined functions\n A user-defined function object is created by a function\n definition (see section *Function definitions*). It should be\n called with an argument list containing the same number of items\n as the function\'s formal parameter list.\n\n Special attributes:\n\n +---------------------------+---------------------------------+-------------+\n | Attribute | Meaning | |\n +===========================+=================================+=============+\n | ``__doc__`` | The function\'s documentation | Writable |\n | | string, or ``None`` if | |\n | | unavailable | |\n +---------------------------+---------------------------------+-------------+\n | ``__name__`` | The function\'s name | Writable |\n +---------------------------+---------------------------------+-------------+\n | ``__qualname__`` | The function\'s *qualified name* | Writable |\n | | New in version 3.3. | |\n +---------------------------+---------------------------------+-------------+\n | ``__module__`` | The name of the module the | Writable |\n | | function was defined in, or | |\n | | ``None`` if unavailable. | |\n +---------------------------+---------------------------------+-------------+\n | ``__defaults__`` | A tuple containing default | Writable |\n | | argument values for those | |\n | | arguments that have defaults, | |\n | | or ``None`` if no arguments | |\n | | have a default value | |\n +---------------------------+---------------------------------+-------------+\n | ``__code__`` | The code object representing | Writable |\n | | the compiled function body. | |\n +---------------------------+---------------------------------+-------------+\n | ``__globals__`` | A reference to the dictionary | Read-only |\n | | that holds the function\'s | |\n | | global variables --- the global | |\n | | namespace of the module in | |\n | | which the function was defined. | |\n +---------------------------+---------------------------------+-------------+\n | ``__dict__`` | The namespace supporting | Writable |\n | | arbitrary function attributes. | |\n +---------------------------+---------------------------------+-------------+\n | ``__closure__`` | ``None`` or a tuple of cells | Read-only |\n | | that contain bindings for the | |\n | | function\'s free variables. | |\n +---------------------------+---------------------------------+-------------+\n | ``__annotations__`` | A dict containing annotations | Writable |\n | | of parameters. The keys of the | |\n | | dict are the parameter names, | |\n | | or ``\'return\'`` for the return | |\n | | annotation, if provided. | |\n +---------------------------+---------------------------------+-------------+\n | ``__kwdefaults__`` | A dict containing defaults for | Writable |\n | | keyword-only parameters. | |\n +---------------------------+---------------------------------+-------------+\n\n Most of the attributes labelled "Writable" check the type of the\n assigned value.\n\n Function objects also support getting and setting arbitrary\n attributes, which can be used, for example, to attach metadata\n to functions. Regular attribute dot-notation is used to get and\n set such attributes. *Note that the current implementation only\n supports function attributes on user-defined functions. Function\n attributes on built-in functions may be supported in the\n future.*\n\n Additional information about a function\'s definition can be\n retrieved from its code object; see the description of internal\n types below.\n\n Instance methods\n An instance method object combines a class, a class instance and\n any callable object (normally a user-defined function).\n\n Special read-only attributes: ``__self__`` is the class instance\n object, ``__func__`` is the function object; ``__doc__`` is the\n method\'s documentation (same as ``__func__.__doc__``);\n ``__name__`` is the method name (same as ``__func__.__name__``);\n ``__module__`` is the name of the module the method was defined\n in, or ``None`` if unavailable.\n\n Methods also support accessing (but not setting) the arbitrary\n function attributes on the underlying function object.\n\n User-defined method objects may be created when getting an\n attribute of a class (perhaps via an instance of that class), if\n that attribute is a user-defined function object or a class\n method object.\n\n When an instance method object is created by retrieving a user-\n defined function object from a class via one of its instances,\n its ``__self__`` attribute is the instance, and the method\n object is said to be bound. The new method\'s ``__func__``\n attribute is the original function object.\n\n When a user-defined method object is created by retrieving\n another method object from a class or instance, the behaviour is\n the same as for a function object, except that the ``__func__``\n attribute of the new instance is not the original method object\n but its ``__func__`` attribute.\n\n When an instance method object is created by retrieving a class\n method object from a class or instance, its ``__self__``\n attribute is the class itself, and its ``__func__`` attribute is\n the function object underlying the class method.\n\n When an instance method object is called, the underlying\n function (``__func__``) is called, inserting the class instance\n (``__self__``) in front of the argument list. For instance,\n when ``C`` is a class which contains a definition for a function\n ``f()``, and ``x`` is an instance of ``C``, calling ``x.f(1)``\n is equivalent to calling ``C.f(x, 1)``.\n\n When an instance method object is derived from a class method\n object, the "class instance" stored in ``__self__`` will\n actually be the class itself, so that calling either ``x.f(1)``\n or ``C.f(1)`` is equivalent to calling ``f(C,1)`` where ``f`` is\n the underlying function.\n\n Note that the transformation from function object to instance\n method object happens each time the attribute is retrieved from\n the instance. In some cases, a fruitful optimization is to\n assign the attribute to a local variable and call that local\n variable. Also notice that this transformation only happens for\n user-defined functions; other callable objects (and all non-\n callable objects) are retrieved without transformation. It is\n also important to note that user-defined functions which are\n attributes of a class instance are not converted to bound\n methods; this *only* happens when the function is an attribute\n of the class.\n\n Generator functions\n A function or method which uses the ``yield`` statement (see\n section *The yield statement*) is called a *generator function*.\n Such a function, when called, always returns an iterator object\n which can be used to execute the body of the function: calling\n the iterator\'s ``__next__()`` method will cause the function to\n execute until it provides a value using the ``yield`` statement.\n When the function executes a ``return`` statement or falls off\n the end, a ``StopIteration`` exception is raised and the\n iterator will have reached the end of the set of values to be\n returned.\n\n Built-in functions\n A built-in function object is a wrapper around a C function.\n Examples of built-in functions are ``len()`` and ``math.sin()``\n (``math`` is a standard built-in module). The number and type of\n the arguments are determined by the C function. Special read-\n only attributes: ``__doc__`` is the function\'s documentation\n string, or ``None`` if unavailable; ``__name__`` is the\n function\'s name; ``__self__`` is set to ``None`` (but see the\n next item); ``__module__`` is the name of the module the\n function was defined in or ``None`` if unavailable.\n\n Built-in methods\n This is really a different disguise of a built-in function, this\n time containing an object passed to the C function as an\n implicit extra argument. An example of a built-in method is\n ``alist.append()``, assuming *alist* is a list object. In this\n case, the special read-only attribute ``__self__`` is set to the\n object denoted by *alist*.\n\n Classes\n Classes are callable. These objects normally act as factories\n for new instances of themselves, but variations are possible for\n class types that override ``__new__()``. The arguments of the\n call are passed to ``__new__()`` and, in the typical case, to\n ``__init__()`` to initialize the new instance.\n\n Class Instances\n Instances of arbitrary classes can be made callable by defining\n a ``__call__()`` method in their class.\n\nModules\n Modules are a basic organizational unit of Python code, and are\n created by the *import system* as invoked either by the ``import``\n statement (see ``import``), or by calling functions such as\n ``importlib.import_module()`` and built-in ``__import__()``. A\n module object has a namespace implemented by a dictionary object\n (this is the dictionary referenced by the ``__globals__`` attribute\n of functions defined in the module). Attribute references are\n translated to lookups in this dictionary, e.g., ``m.x`` is\n equivalent to ``m.__dict__["x"]``. A module object does not contain\n the code object used to initialize the module (since it isn\'t\n needed once the initialization is done).\n\n Attribute assignment updates the module\'s namespace dictionary,\n e.g., ``m.x = 1`` is equivalent to ``m.__dict__["x"] = 1``.\n\n Special read-only attribute: ``__dict__`` is the module\'s namespace\n as a dictionary object.\n\n **CPython implementation detail:** Because of the way CPython\n clears module dictionaries, the module dictionary will be cleared\n when the module falls out of scope even if the dictionary still has\n live references. To avoid this, copy the dictionary or keep the\n module around while using its dictionary directly.\n\n Predefined (writable) attributes: ``__name__`` is the module\'s\n name; ``__doc__`` is the module\'s documentation string, or ``None``\n if unavailable; ``__file__`` is the pathname of the file from which\n the module was loaded, if it was loaded from a file. The\n ``__file__`` attribute may be missing for certain types of modules,\n such as C modules that are statically linked into the interpreter;\n for extension modules loaded dynamically from a shared library, it\n is the pathname of the shared library file.\n\nCustom classes\n Custom class types are typically created by class definitions (see\n section *Class definitions*). A class has a namespace implemented\n by a dictionary object. Class attribute references are translated\n to lookups in this dictionary, e.g., ``C.x`` is translated to\n ``C.__dict__["x"]`` (although there are a number of hooks which\n allow for other means of locating attributes). When the attribute\n name is not found there, the attribute search continues in the base\n classes. This search of the base classes uses the C3 method\n resolution order which behaves correctly even in the presence of\n \'diamond\' inheritance structures where there are multiple\n inheritance paths leading back to a common ancestor. Additional\n details on the C3 MRO used by Python can be found in the\n documentation accompanying the 2.3 release at\n http://www.python.org/download/releases/2.3/mro/.\n\n When a class attribute reference (for class ``C``, say) would yield\n a class method object, it is transformed into an instance method\n object whose ``__self__`` attributes is ``C``. When it would yield\n a static method object, it is transformed into the object wrapped\n by the static method object. See section *Implementing Descriptors*\n for another way in which attributes retrieved from a class may\n differ from those actually contained in its ``__dict__``.\n\n Class attribute assignments update the class\'s dictionary, never\n the dictionary of a base class.\n\n A class object can be called (see above) to yield a class instance\n (see below).\n\n Special attributes: ``__name__`` is the class name; ``__module__``\n is the module name in which the class was defined; ``__dict__`` is\n the dictionary containing the class\'s namespace; ``__bases__`` is a\n tuple (possibly empty or a singleton) containing the base classes,\n in the order of their occurrence in the base class list;\n ``__doc__`` is the class\'s documentation string, or None if\n undefined.\n\nClass instances\n A class instance is created by calling a class object (see above).\n A class instance has a namespace implemented as a dictionary which\n is the first place in which attribute references are searched.\n When an attribute is not found there, and the instance\'s class has\n an attribute by that name, the search continues with the class\n attributes. If a class attribute is found that is a user-defined\n function object, it is transformed into an instance method object\n whose ``__self__`` attribute is the instance. Static method and\n class method objects are also transformed; see above under\n "Classes". See section *Implementing Descriptors* for another way\n in which attributes of a class retrieved via its instances may\n differ from the objects actually stored in the class\'s\n ``__dict__``. If no class attribute is found, and the object\'s\n class has a ``__getattr__()`` method, that is called to satisfy the\n lookup.\n\n Attribute assignments and deletions update the instance\'s\n dictionary, never a class\'s dictionary. If the class has a\n ``__setattr__()`` or ``__delattr__()`` method, this is called\n instead of updating the instance dictionary directly.\n\n Class instances can pretend to be numbers, sequences, or mappings\n if they have methods with certain special names. See section\n *Special method names*.\n\n Special attributes: ``__dict__`` is the attribute dictionary;\n ``__class__`` is the instance\'s class.\n\nI/O objects (also known as file objects)\n A *file object* represents an open file. Various shortcuts are\n available to create file objects: the ``open()`` built-in function,\n and also ``os.popen()``, ``os.fdopen()``, and the ``makefile()``\n method of socket objects (and perhaps by other functions or methods\n provided by extension modules).\n\n The objects ``sys.stdin``, ``sys.stdout`` and ``sys.stderr`` are\n initialized to file objects corresponding to the interpreter\'s\n standard input, output and error streams; they are all open in text\n mode and therefore follow the interface defined by the\n ``io.TextIOBase`` abstract class.\n\nInternal types\n A few types used internally by the interpreter are exposed to the\n user. Their definitions may change with future versions of the\n interpreter, but they are mentioned here for completeness.\n\n Code objects\n Code objects represent *byte-compiled* executable Python code,\n or *bytecode*. The difference between a code object and a\n function object is that the function object contains an explicit\n reference to the function\'s globals (the module in which it was\n defined), while a code object contains no context; also the\n default argument values are stored in the function object, not\n in the code object (because they represent values calculated at\n run-time). Unlike function objects, code objects are immutable\n and contain no references (directly or indirectly) to mutable\n objects.\n\n Special read-only attributes: ``co_name`` gives the function\n name; ``co_argcount`` is the number of positional arguments\n (including arguments with default values); ``co_nlocals`` is the\n number of local variables used by the function (including\n arguments); ``co_varnames`` is a tuple containing the names of\n the local variables (starting with the argument names);\n ``co_cellvars`` is a tuple containing the names of local\n variables that are referenced by nested functions;\n ``co_freevars`` is a tuple containing the names of free\n variables; ``co_code`` is a string representing the sequence of\n bytecode instructions; ``co_consts`` is a tuple containing the\n literals used by the bytecode; ``co_names`` is a tuple\n containing the names used by the bytecode; ``co_filename`` is\n the filename from which the code was compiled;\n ``co_firstlineno`` is the first line number of the function;\n ``co_lnotab`` is a string encoding the mapping from bytecode\n offsets to line numbers (for details see the source code of the\n interpreter); ``co_stacksize`` is the required stack size\n (including local variables); ``co_flags`` is an integer encoding\n a number of flags for the interpreter.\n\n The following flag bits are defined for ``co_flags``: bit\n ``0x04`` is set if the function uses the ``*arguments`` syntax\n to accept an arbitrary number of positional arguments; bit\n ``0x08`` is set if the function uses the ``**keywords`` syntax\n to accept arbitrary keyword arguments; bit ``0x20`` is set if\n the function is a generator.\n\n Future feature declarations (``from __future__ import\n division``) also use bits in ``co_flags`` to indicate whether a\n code object was compiled with a particular feature enabled: bit\n ``0x2000`` is set if the function was compiled with future\n division enabled; bits ``0x10`` and ``0x1000`` were used in\n earlier versions of Python.\n\n Other bits in ``co_flags`` are reserved for internal use.\n\n If a code object represents a function, the first item in\n ``co_consts`` is the documentation string of the function, or\n ``None`` if undefined.\n\n Frame objects\n Frame objects represent execution frames. They may occur in\n traceback objects (see below).\n\n Special read-only attributes: ``f_back`` is to the previous\n stack frame (towards the caller), or ``None`` if this is the\n bottom stack frame; ``f_code`` is the code object being executed\n in this frame; ``f_locals`` is the dictionary used to look up\n local variables; ``f_globals`` is used for global variables;\n ``f_builtins`` is used for built-in (intrinsic) names;\n ``f_lasti`` gives the precise instruction (this is an index into\n the bytecode string of the code object).\n\n Special writable attributes: ``f_trace``, if not ``None``, is a\n function called at the start of each source code line (this is\n used by the debugger); ``f_lineno`` is the current line number\n of the frame --- writing to this from within a trace function\n jumps to the given line (only for the bottom-most frame). A\n debugger can implement a Jump command (aka Set Next Statement)\n by writing to f_lineno.\n\n Traceback objects\n Traceback objects represent a stack trace of an exception. A\n traceback object is created when an exception occurs. When the\n search for an exception handler unwinds the execution stack, at\n each unwound level a traceback object is inserted in front of\n the current traceback. When an exception handler is entered,\n the stack trace is made available to the program. (See section\n *The try statement*.) It is accessible as the third item of the\n tuple returned by ``sys.exc_info()``. When the program contains\n no suitable handler, the stack trace is written (nicely\n formatted) to the standard error stream; if the interpreter is\n interactive, it is also made available to the user as\n ``sys.last_traceback``.\n\n Special read-only attributes: ``tb_next`` is the next level in\n the stack trace (towards the frame where the exception\n occurred), or ``None`` if there is no next level; ``tb_frame``\n points to the execution frame of the current level;\n ``tb_lineno`` gives the line number where the exception\n occurred; ``tb_lasti`` indicates the precise instruction. The\n line number and last instruction in the traceback may differ\n from the line number of its frame object if the exception\n occurred in a ``try`` statement with no matching except clause\n or with a finally clause.\n\n Slice objects\n Slice objects are used to represent slices for ``__getitem__()``\n methods. They are also created by the built-in ``slice()``\n function.\n\n Special read-only attributes: ``start`` is the lower bound;\n ``stop`` is the upper bound; ``step`` is the step value; each is\n ``None`` if omitted. These attributes can have any type.\n\n Slice objects support one method:\n\n slice.indices(self, length)\n\n This method takes a single integer argument *length* and\n computes information about the slice that the slice object\n would describe if applied to a sequence of *length* items.\n It returns a tuple of three integers; respectively these are\n the *start* and *stop* indices and the *step* or stride\n length of the slice. Missing or out-of-bounds indices are\n handled in a manner consistent with regular slices.\n\n Static method objects\n Static method objects provide a way of defeating the\n transformation of function objects to method objects described\n above. A static method object is a wrapper around any other\n object, usually a user-defined method object. When a static\n method object is retrieved from a class or a class instance, the\n object actually returned is the wrapped object, which is not\n subject to any further transformation. Static method objects are\n not themselves callable, although the objects they wrap usually\n are. Static method objects are created by the built-in\n ``staticmethod()`` constructor.\n\n Class method objects\n A class method object, like a static method object, is a wrapper\n around another object that alters the way in which that object\n is retrieved from classes and class instances. The behaviour of\n class method objects upon such retrieval is described above,\n under "User-defined methods". Class method objects are created\n by the built-in ``classmethod()`` constructor.\n',
'typesfunctions': '\nFunctions\n*********\n\nFunction objects are created by function definitions. The only\noperation on a function object is to call it: ``func(argument-list)``.\n\nThere are really two flavors of function objects: built-in functions\nand user-defined functions. Both support the same operation (to call\nthe function), but the implementation is different, hence the\ndifferent object types.\n\nSee *Function definitions* for more information.\n',
- 'typesmapping': '\nMapping Types --- ``dict``\n**************************\n\nA *mapping* object maps *hashable* values to arbitrary objects.\nMappings are mutable objects. There is currently only one standard\nmapping type, the *dictionary*. (For other containers see the built\nin ``list``, ``set``, and ``tuple`` classes, and the ``collections``\nmodule.)\n\nA dictionary\'s keys are *almost* arbitrary values. Values that are\nnot *hashable*, that is, values containing lists, dictionaries or\nother mutable types (that are compared by value rather than by object\nidentity) may not be used as keys. Numeric types used for keys obey\nthe normal rules for numeric comparison: if two numbers compare equal\n(such as ``1`` and ``1.0``) then they can be used interchangeably to\nindex the same dictionary entry. (Note however, that since computers\nstore floating-point numbers as approximations it is usually unwise to\nuse them as dictionary keys.)\n\nDictionaries can be created by placing a comma-separated list of\n``key: value`` pairs within braces, for example: ``{\'jack\': 4098,\n\'sjoerd\': 4127}`` or ``{4098: \'jack\', 4127: \'sjoerd\'}``, or by the\n``dict`` constructor.\n\nclass class dict([arg])\n\n Return a new dictionary initialized from an optional positional\n argument or from a set of keyword arguments. If no arguments are\n given, return a new empty dictionary. If the positional argument\n *arg* is a mapping object, return a dictionary mapping the same\n keys to the same values as does the mapping object. Otherwise the\n positional argument must be a sequence, a container that supports\n iteration, or an iterator object. The elements of the argument\n must each also be of one of those kinds, and each must in turn\n contain exactly two objects. The first is used as a key in the new\n dictionary, and the second as the key\'s value. If a given key is\n seen more than once, the last value associated with it is retained\n in the new dictionary.\n\n If keyword arguments are given, the keywords themselves with their\n associated values are added as items to the dictionary. If a key\n is specified both in the positional argument and as a keyword\n argument, the value associated with the keyword is retained in the\n dictionary. For example, these all return a dictionary equal to\n ``{"one": 1, "two": 2}``:\n\n * ``dict(one=1, two=2)``\n\n * ``dict({\'one\': 1, \'two\': 2})``\n\n * ``dict(zip((\'one\', \'two\'), (1, 2)))``\n\n * ``dict([[\'two\', 2], [\'one\', 1]])``\n\n The first example only works for keys that are valid Python\n identifiers; the others work with any valid keys.\n\n These are the operations that dictionaries support (and therefore,\n custom mapping types should support too):\n\n len(d)\n\n Return the number of items in the dictionary *d*.\n\n d[key]\n\n Return the item of *d* with key *key*. Raises a ``KeyError`` if\n *key* is not in the map.\n\n If a subclass of dict defines a method ``__missing__()``, if the\n key *key* is not present, the ``d[key]`` operation calls that\n method with the key *key* as argument. The ``d[key]`` operation\n then returns or raises whatever is returned or raised by the\n ``__missing__(key)`` call if the key is not present. No other\n operations or methods invoke ``__missing__()``. If\n ``__missing__()`` is not defined, ``KeyError`` is raised.\n ``__missing__()`` must be a method; it cannot be an instance\n variable:\n\n >>> class Counter(dict):\n ... def __missing__(self, key):\n ... return 0\n >>> c = Counter()\n >>> c[\'red\']\n 0\n >>> c[\'red\'] += 1\n >>> c[\'red\']\n 1\n\n See ``collections.Counter`` for a complete implementation\n including other methods helpful for accumulating and managing\n tallies.\n\n d[key] = value\n\n Set ``d[key]`` to *value*.\n\n del d[key]\n\n Remove ``d[key]`` from *d*. Raises a ``KeyError`` if *key* is\n not in the map.\n\n key in d\n\n Return ``True`` if *d* has a key *key*, else ``False``.\n\n key not in d\n\n Equivalent to ``not key in d``.\n\n iter(d)\n\n Return an iterator over the keys of the dictionary. This is a\n shortcut for ``iter(d.keys())``.\n\n clear()\n\n Remove all items from the dictionary.\n\n copy()\n\n Return a shallow copy of the dictionary.\n\n classmethod fromkeys(seq[, value])\n\n Create a new dictionary with keys from *seq* and values set to\n *value*.\n\n ``fromkeys()`` is a class method that returns a new dictionary.\n *value* defaults to ``None``.\n\n get(key[, default])\n\n Return the value for *key* if *key* is in the dictionary, else\n *default*. If *default* is not given, it defaults to ``None``,\n so that this method never raises a ``KeyError``.\n\n items()\n\n Return a new view of the dictionary\'s items (``(key, value)``\n pairs). See below for documentation of view objects.\n\n keys()\n\n Return a new view of the dictionary\'s keys. See below for\n documentation of view objects.\n\n pop(key[, default])\n\n If *key* is in the dictionary, remove it and return its value,\n else return *default*. If *default* is not given and *key* is\n not in the dictionary, a ``KeyError`` is raised.\n\n popitem()\n\n Remove and return an arbitrary ``(key, value)`` pair from the\n dictionary.\n\n ``popitem()`` is useful to destructively iterate over a\n dictionary, as often used in set algorithms. If the dictionary\n is empty, calling ``popitem()`` raises a ``KeyError``.\n\n setdefault(key[, default])\n\n If *key* is in the dictionary, return its value. If not, insert\n *key* with a value of *default* and return *default*. *default*\n defaults to ``None``.\n\n update([other])\n\n Update the dictionary with the key/value pairs from *other*,\n overwriting existing keys. Return ``None``.\n\n ``update()`` accepts either another dictionary object or an\n iterable of key/value pairs (as tuples or other iterables of\n length two). If keyword arguments are specified, the dictionary\n is then updated with those key/value pairs: ``d.update(red=1,\n blue=2)``.\n\n values()\n\n Return a new view of the dictionary\'s values. See below for\n documentation of view objects.\n\n\nDictionary view objects\n=======================\n\nThe objects returned by ``dict.keys()``, ``dict.values()`` and\n``dict.items()`` are *view objects*. They provide a dynamic view on\nthe dictionary\'s entries, which means that when the dictionary\nchanges, the view reflects these changes.\n\nDictionary views can be iterated over to yield their respective data,\nand support membership tests:\n\nlen(dictview)\n\n Return the number of entries in the dictionary.\n\niter(dictview)\n\n Return an iterator over the keys, values or items (represented as\n tuples of ``(key, value)``) in the dictionary.\n\n Keys and values are iterated over in an arbitrary order which is\n non-random, varies across Python implementations, and depends on\n the dictionary\'s history of insertions and deletions. If keys,\n values and items views are iterated over with no intervening\n modifications to the dictionary, the order of items will directly\n correspond. This allows the creation of ``(value, key)`` pairs\n using ``zip()``: ``pairs = zip(d.values(), d.keys())``. Another\n way to create the same list is ``pairs = [(v, k) for (k, v) in\n d.items()]``.\n\n Iterating views while adding or deleting entries in the dictionary\n may raise a ``RuntimeError`` or fail to iterate over all entries.\n\nx in dictview\n\n Return ``True`` if *x* is in the underlying dictionary\'s keys,\n values or items (in the latter case, *x* should be a ``(key,\n value)`` tuple).\n\nKeys views are set-like since their entries are unique and hashable.\nIf all values are hashable, so that ``(key, value)`` pairs are unique\nand hashable, then the items view is also set-like. (Values views are\nnot treated as set-like since the entries are generally not unique.)\nFor set-like views, all of the operations defined for the abstract\nbase class ``collections.Set`` are available (for example, ``==``,\n``<``, or ``^``).\n\nAn example of dictionary view usage:\n\n >>> dishes = {\'eggs\': 2, \'sausage\': 1, \'bacon\': 1, \'spam\': 500}\n >>> keys = dishes.keys()\n >>> values = dishes.values()\n\n >>> # iteration\n >>> n = 0\n >>> for val in values:\n ... n += val\n >>> print(n)\n 504\n\n >>> # keys and values are iterated over in the same order\n >>> list(keys)\n [\'eggs\', \'bacon\', \'sausage\', \'spam\']\n >>> list(values)\n [2, 1, 1, 500]\n\n >>> # view objects are dynamic and reflect dict changes\n >>> del dishes[\'eggs\']\n >>> del dishes[\'sausage\']\n >>> list(keys)\n [\'spam\', \'bacon\']\n\n >>> # set operations\n >>> keys & {\'eggs\', \'bacon\', \'salad\'}\n {\'bacon\'}\n >>> keys ^ {\'sausage\', \'juice\'}\n {\'juice\', \'sausage\', \'bacon\', \'spam\'}\n',
+ 'typesmapping': '\nMapping Types --- ``dict``\n**************************\n\nA *mapping* object maps *hashable* values to arbitrary objects.\nMappings are mutable objects. There is currently only one standard\nmapping type, the *dictionary*. (For other containers see the built-\nin ``list``, ``set``, and ``tuple`` classes, and the ``collections``\nmodule.)\n\nA dictionary\'s keys are *almost* arbitrary values. Values that are\nnot *hashable*, that is, values containing lists, dictionaries or\nother mutable types (that are compared by value rather than by object\nidentity) may not be used as keys. Numeric types used for keys obey\nthe normal rules for numeric comparison: if two numbers compare equal\n(such as ``1`` and ``1.0``) then they can be used interchangeably to\nindex the same dictionary entry. (Note however, that since computers\nstore floating-point numbers as approximations it is usually unwise to\nuse them as dictionary keys.)\n\nDictionaries can be created by placing a comma-separated list of\n``key: value`` pairs within braces, for example: ``{\'jack\': 4098,\n\'sjoerd\': 4127}`` or ``{4098: \'jack\', 4127: \'sjoerd\'}``, or by the\n``dict`` constructor.\n\nclass class dict([arg])\n\n Return a new dictionary initialized from an optional positional\n argument or from a set of keyword arguments. If no arguments are\n given, return a new empty dictionary. If the positional argument\n *arg* is a mapping object, return a dictionary mapping the same\n keys to the same values as does the mapping object. Otherwise the\n positional argument must be a sequence, a container that supports\n iteration, or an iterator object. The elements of the argument\n must each also be of one of those kinds, and each must in turn\n contain exactly two objects. The first is used as a key in the new\n dictionary, and the second as the key\'s value. If a given key is\n seen more than once, the last value associated with it is retained\n in the new dictionary.\n\n If keyword arguments are given, the keywords themselves with their\n associated values are added as items to the dictionary. If a key\n is specified both in the positional argument and as a keyword\n argument, the value associated with the keyword is retained in the\n dictionary. For example, these all return a dictionary equal to\n ``{"one": 1, "two": 2}``:\n\n * ``dict(one=1, two=2)``\n\n * ``dict({\'one\': 1, \'two\': 2})``\n\n * ``dict(zip((\'one\', \'two\'), (1, 2)))``\n\n * ``dict([[\'two\', 2], [\'one\', 1]])``\n\n The first example only works for keys that are valid Python\n identifiers; the others work with any valid keys.\n\n These are the operations that dictionaries support (and therefore,\n custom mapping types should support too):\n\n len(d)\n\n Return the number of items in the dictionary *d*.\n\n d[key]\n\n Return the item of *d* with key *key*. Raises a ``KeyError`` if\n *key* is not in the map.\n\n If a subclass of dict defines a method ``__missing__()``, if the\n key *key* is not present, the ``d[key]`` operation calls that\n method with the key *key* as argument. The ``d[key]`` operation\n then returns or raises whatever is returned or raised by the\n ``__missing__(key)`` call if the key is not present. No other\n operations or methods invoke ``__missing__()``. If\n ``__missing__()`` is not defined, ``KeyError`` is raised.\n ``__missing__()`` must be a method; it cannot be an instance\n variable:\n\n >>> class Counter(dict):\n ... def __missing__(self, key):\n ... return 0\n >>> c = Counter()\n >>> c[\'red\']\n 0\n >>> c[\'red\'] += 1\n >>> c[\'red\']\n 1\n\n See ``collections.Counter`` for a complete implementation\n including other methods helpful for accumulating and managing\n tallies.\n\n d[key] = value\n\n Set ``d[key]`` to *value*.\n\n del d[key]\n\n Remove ``d[key]`` from *d*. Raises a ``KeyError`` if *key* is\n not in the map.\n\n key in d\n\n Return ``True`` if *d* has a key *key*, else ``False``.\n\n key not in d\n\n Equivalent to ``not key in d``.\n\n iter(d)\n\n Return an iterator over the keys of the dictionary. This is a\n shortcut for ``iter(d.keys())``.\n\n clear()\n\n Remove all items from the dictionary.\n\n copy()\n\n Return a shallow copy of the dictionary.\n\n classmethod fromkeys(seq[, value])\n\n Create a new dictionary with keys from *seq* and values set to\n *value*.\n\n ``fromkeys()`` is a class method that returns a new dictionary.\n *value* defaults to ``None``.\n\n get(key[, default])\n\n Return the value for *key* if *key* is in the dictionary, else\n *default*. If *default* is not given, it defaults to ``None``,\n so that this method never raises a ``KeyError``.\n\n items()\n\n Return a new view of the dictionary\'s items (``(key, value)``\n pairs). See the *documentation of view objects*.\n\n keys()\n\n Return a new view of the dictionary\'s keys. See the\n *documentation of view objects*.\n\n pop(key[, default])\n\n If *key* is in the dictionary, remove it and return its value,\n else return *default*. If *default* is not given and *key* is\n not in the dictionary, a ``KeyError`` is raised.\n\n popitem()\n\n Remove and return an arbitrary ``(key, value)`` pair from the\n dictionary.\n\n ``popitem()`` is useful to destructively iterate over a\n dictionary, as often used in set algorithms. If the dictionary\n is empty, calling ``popitem()`` raises a ``KeyError``.\n\n setdefault(key[, default])\n\n If *key* is in the dictionary, return its value. If not, insert\n *key* with a value of *default* and return *default*. *default*\n defaults to ``None``.\n\n update([other])\n\n Update the dictionary with the key/value pairs from *other*,\n overwriting existing keys. Return ``None``.\n\n ``update()`` accepts either another dictionary object or an\n iterable of key/value pairs (as tuples or other iterables of\n length two). If keyword arguments are specified, the dictionary\n is then updated with those key/value pairs: ``d.update(red=1,\n blue=2)``.\n\n values()\n\n Return a new view of the dictionary\'s values. See the\n *documentation of view objects*.\n\nSee also:\n\n ``types.MappingProxyType`` can be used to create a read-only view\n of a ``dict``.\n\n\nDictionary view objects\n=======================\n\nThe objects returned by ``dict.keys()``, ``dict.values()`` and\n``dict.items()`` are *view objects*. They provide a dynamic view on\nthe dictionary\'s entries, which means that when the dictionary\nchanges, the view reflects these changes.\n\nDictionary views can be iterated over to yield their respective data,\nand support membership tests:\n\nlen(dictview)\n\n Return the number of entries in the dictionary.\n\niter(dictview)\n\n Return an iterator over the keys, values or items (represented as\n tuples of ``(key, value)``) in the dictionary.\n\n Keys and values are iterated over in an arbitrary order which is\n non-random, varies across Python implementations, and depends on\n the dictionary\'s history of insertions and deletions. If keys,\n values and items views are iterated over with no intervening\n modifications to the dictionary, the order of items will directly\n correspond. This allows the creation of ``(value, key)`` pairs\n using ``zip()``: ``pairs = zip(d.values(), d.keys())``. Another\n way to create the same list is ``pairs = [(v, k) for (k, v) in\n d.items()]``.\n\n Iterating views while adding or deleting entries in the dictionary\n may raise a ``RuntimeError`` or fail to iterate over all entries.\n\nx in dictview\n\n Return ``True`` if *x* is in the underlying dictionary\'s keys,\n values or items (in the latter case, *x* should be a ``(key,\n value)`` tuple).\n\nKeys views are set-like since their entries are unique and hashable.\nIf all values are hashable, so that ``(key, value)`` pairs are unique\nand hashable, then the items view is also set-like. (Values views are\nnot treated as set-like since the entries are generally not unique.)\nFor set-like views, all of the operations defined for the abstract\nbase class ``collections.abc.Set`` are available (for example, ``==``,\n``<``, or ``^``).\n\nAn example of dictionary view usage:\n\n >>> dishes = {\'eggs\': 2, \'sausage\': 1, \'bacon\': 1, \'spam\': 500}\n >>> keys = dishes.keys()\n >>> values = dishes.values()\n\n >>> # iteration\n >>> n = 0\n >>> for val in values:\n ... n += val\n >>> print(n)\n 504\n\n >>> # keys and values are iterated over in the same order\n >>> list(keys)\n [\'eggs\', \'bacon\', \'sausage\', \'spam\']\n >>> list(values)\n [2, 1, 1, 500]\n\n >>> # view objects are dynamic and reflect dict changes\n >>> del dishes[\'eggs\']\n >>> del dishes[\'sausage\']\n >>> list(keys)\n [\'spam\', \'bacon\']\n\n >>> # set operations\n >>> keys & {\'eggs\', \'bacon\', \'salad\'}\n {\'bacon\'}\n >>> keys ^ {\'sausage\', \'juice\'}\n {\'juice\', \'sausage\', \'bacon\', \'spam\'}\n',
'typesmethods': "\nMethods\n*******\n\nMethods are functions that are called using the attribute notation.\nThere are two flavors: built-in methods (such as ``append()`` on\nlists) and class instance methods. Built-in methods are described\nwith the types that support them.\n\nIf you access a method (a function defined in a class namespace)\nthrough an instance, you get a special object: a *bound method* (also\ncalled *instance method*) object. When called, it will add the\n``self`` argument to the argument list. Bound methods have two\nspecial read-only attributes: ``m.__self__`` is the object on which\nthe method operates, and ``m.__func__`` is the function implementing\nthe method. Calling ``m(arg-1, arg-2, ..., arg-n)`` is completely\nequivalent to calling ``m.__func__(m.__self__, arg-1, arg-2, ...,\narg-n)``.\n\nLike function objects, bound method objects support getting arbitrary\nattributes. However, since method attributes are actually stored on\nthe underlying function object (``meth.__func__``), setting method\nattributes on bound methods is disallowed. Attempting to set a method\nattribute results in a ``TypeError`` being raised. In order to set a\nmethod attribute, you need to explicitly set it on the underlying\nfunction object:\n\n class C:\n def method(self):\n pass\n\n c = C()\n c.method.__func__.whoami = 'my name is c'\n\nSee *The standard type hierarchy* for more information.\n",
'typesmodules': "\nModules\n*******\n\nThe only special operation on a module is attribute access:\n``m.name``, where *m* is a module and *name* accesses a name defined\nin *m*'s symbol table. Module attributes can be assigned to. (Note\nthat the ``import`` statement is not, strictly speaking, an operation\non a module object; ``import foo`` does not require a module object\nnamed *foo* to exist, rather it requires an (external) *definition*\nfor a module named *foo* somewhere.)\n\nA special attribute of every module is ``__dict__``. This is the\ndictionary containing the module's symbol table. Modifying this\ndictionary will actually change the module's symbol table, but direct\nassignment to the ``__dict__`` attribute is not possible (you can\nwrite ``m.__dict__['a'] = 1``, which defines ``m.a`` to be ``1``, but\nyou can't write ``m.__dict__ = {}``). Modifying ``__dict__`` directly\nis not recommended.\n\nModules built into the interpreter are written like this: ``<module\n'sys' (built-in)>``. If loaded from a file, they are written as\n``<module 'os' from '/usr/local/lib/pythonX.Y/os.pyc'>``.\n",
- 'typesseq': '\nSequence Types --- ``str``, ``bytes``, ``bytearray``, ``list``, ``tuple``, ``range``\n************************************************************************************\n\nThere are six sequence types: strings, byte sequences (``bytes``\nobjects), byte arrays (``bytearray`` objects), lists, tuples, and\nrange objects. For other containers see the built in ``dict`` and\n``set`` classes, and the ``collections`` module.\n\nStrings contain Unicode characters. Their literals are written in\nsingle or double quotes: ``\'xyzzy\'``, ``"frobozz"``. See *String and\nBytes literals* for more about string literals. In addition to the\nfunctionality described here, there are also string-specific methods\ndescribed in the *String Methods* section.\n\nBytes and bytearray objects contain single bytes -- the former is\nimmutable while the latter is a mutable sequence. Bytes objects can\nbe constructed the constructor, ``bytes()``, and from literals; use a\n``b`` prefix with normal string syntax: ``b\'xyzzy\'``. To construct\nbyte arrays, use the ``bytearray()`` function.\n\nWhile string objects are sequences of characters (represented by\nstrings of length 1), bytes and bytearray objects are sequences of\n*integers* (between 0 and 255), representing the ASCII value of single\nbytes. That means that for a bytes or bytearray object *b*, ``b[0]``\nwill be an integer, while ``b[0:1]`` will be a bytes or bytearray\nobject of length 1. The representation of bytes objects uses the\nliteral format (``b\'...\'``) since it is generally more useful than\ne.g. ``bytes([50, 19, 100])``. You can always convert a bytes object\ninto a list of integers using ``list(b)``.\n\nAlso, while in previous Python versions, byte strings and Unicode\nstrings could be exchanged for each other rather freely (barring\nencoding issues), strings and bytes are now completely separate\nconcepts. There\'s no implicit en-/decoding if you pass an object of\nthe wrong type. A string always compares unequal to a bytes or\nbytearray object.\n\nLists are constructed with square brackets, separating items with\ncommas: ``[a, b, c]``. Tuples are constructed by the comma operator\n(not within square brackets), with or without enclosing parentheses,\nbut an empty tuple must have the enclosing parentheses, such as ``a,\nb, c`` or ``()``. A single item tuple must have a trailing comma,\nsuch as ``(d,)``.\n\nObjects of type range are created using the ``range()`` function.\nThey don\'t support concatenation or repetition, and using ``min()`` or\n``max()`` on them is inefficient.\n\nMost sequence types support the following operations. The ``in`` and\n``not in`` operations have the same priorities as the comparison\noperations. The ``+`` and ``*`` operations have the same priority as\nthe corresponding numeric operations. [3] Additional methods are\nprovided for *Mutable Sequence Types*.\n\nThis table lists the sequence operations sorted in ascending priority\n(operations in the same box have the same priority). In the table,\n*s* and *t* are sequences of the same type; *n*, *i*, *j* and *k* are\nintegers.\n\n+--------------------+----------------------------------+------------+\n| Operation | Result | Notes |\n+====================+==================================+============+\n| ``x in s`` | ``True`` if an item of *s* is | (1) |\n| | equal to *x*, else ``False`` | |\n+--------------------+----------------------------------+------------+\n| ``x not in s`` | ``False`` if an item of *s* is | (1) |\n| | equal to *x*, else ``True`` | |\n+--------------------+----------------------------------+------------+\n| ``s + t`` | the concatenation of *s* and *t* | (6) |\n+--------------------+----------------------------------+------------+\n| ``s * n, n * s`` | *n* shallow copies of *s* | (2) |\n| | concatenated | |\n+--------------------+----------------------------------+------------+\n| ``s[i]`` | *i*th item of *s*, origin 0 | (3) |\n+--------------------+----------------------------------+------------+\n| ``s[i:j]`` | slice of *s* from *i* to *j* | (3)(4) |\n+--------------------+----------------------------------+------------+\n| ``s[i:j:k]`` | slice of *s* from *i* to *j* | (3)(5) |\n| | with step *k* | |\n+--------------------+----------------------------------+------------+\n| ``len(s)`` | length of *s* | |\n+--------------------+----------------------------------+------------+\n| ``min(s)`` | smallest item of *s* | |\n+--------------------+----------------------------------+------------+\n| ``max(s)`` | largest item of *s* | |\n+--------------------+----------------------------------+------------+\n| ``s.index(i)`` | index of the first occurence of | |\n| | *i* in *s* | |\n+--------------------+----------------------------------+------------+\n| ``s.count(i)`` | total number of occurences of | |\n| | *i* in *s* | |\n+--------------------+----------------------------------+------------+\n\nSequence types also support comparisons. In particular, tuples and\nlists are compared lexicographically by comparing corresponding\nelements. This means that to compare equal, every element must\ncompare equal and the two sequences must be of the same type and have\nthe same length. (For full details see *Comparisons* in the language\nreference.)\n\nNotes:\n\n1. When *s* is a string object, the ``in`` and ``not in`` operations\n act like a substring test.\n\n2. Values of *n* less than ``0`` are treated as ``0`` (which yields an\n empty sequence of the same type as *s*). Note also that the copies\n are shallow; nested structures are not copied. This often haunts\n new Python programmers; consider:\n\n >>> lists = [[]] * 3\n >>> lists\n [[], [], []]\n >>> lists[0].append(3)\n >>> lists\n [[3], [3], [3]]\n\n What has happened is that ``[[]]`` is a one-element list containing\n an empty list, so all three elements of ``[[]] * 3`` are (pointers\n to) this single empty list. Modifying any of the elements of\n ``lists`` modifies this single list. You can create a list of\n different lists this way:\n\n >>> lists = [[] for i in range(3)]\n >>> lists[0].append(3)\n >>> lists[1].append(5)\n >>> lists[2].append(7)\n >>> lists\n [[3], [5], [7]]\n\n3. If *i* or *j* is negative, the index is relative to the end of the\n string: ``len(s) + i`` or ``len(s) + j`` is substituted. But note\n that ``-0`` is still ``0``.\n\n4. The slice of *s* from *i* to *j* is defined as the sequence of\n items with index *k* such that ``i <= k < j``. If *i* or *j* is\n greater than ``len(s)``, use ``len(s)``. If *i* is omitted or\n ``None``, use ``0``. If *j* is omitted or ``None``, use\n ``len(s)``. If *i* is greater than or equal to *j*, the slice is\n empty.\n\n5. The slice of *s* from *i* to *j* with step *k* is defined as the\n sequence of items with index ``x = i + n*k`` such that ``0 <= n <\n (j-i)/k``. In other words, the indices are ``i``, ``i+k``,\n ``i+2*k``, ``i+3*k`` and so on, stopping when *j* is reached (but\n never including *j*). If *i* or *j* is greater than ``len(s)``,\n use ``len(s)``. If *i* or *j* are omitted or ``None``, they become\n "end" values (which end depends on the sign of *k*). Note, *k*\n cannot be zero. If *k* is ``None``, it is treated like ``1``.\n\n6. Concatenating immutable strings always results in a new object.\n This means that building up a string by repeated concatenation will\n have a quadratic runtime cost in the total string length. To get a\n linear runtime cost, you must switch to one of the alternatives\n below:\n\n * if concatenating ``str`` objects, you can build a list and use\n ``str.join()`` at the end;\n\n * if concatenating ``bytes`` objects, you can similarly use\n ``bytes.join()``, or you can do in-place concatenation with a\n ``bytearray`` object. ``bytearray`` objects are mutable and have\n an efficient overallocation mechanism.\n\n\nString Methods\n==============\n\nString objects support the methods listed below.\n\nIn addition, Python\'s strings support the sequence type methods\ndescribed in the *Sequence Types --- str, bytes, bytearray, list,\ntuple, range* section. To output formatted strings, see the *String\nFormatting* section. Also, see the ``re`` module for string functions\nbased on regular expressions.\n\nstr.capitalize()\n\n Return a copy of the string with its first character capitalized\n and the rest lowercased.\n\nstr.center(width[, fillchar])\n\n Return centered in a string of length *width*. Padding is done\n using the specified *fillchar* (default is a space).\n\nstr.count(sub[, start[, end]])\n\n Return the number of non-overlapping occurrences of substring *sub*\n in the range [*start*, *end*]. Optional arguments *start* and\n *end* are interpreted as in slice notation.\n\nstr.encode(encoding="utf-8", errors="strict")\n\n Return an encoded version of the string as a bytes object. Default\n encoding is ``\'utf-8\'``. *errors* may be given to set a different\n error handling scheme. The default for *errors* is ``\'strict\'``,\n meaning that encoding errors raise a ``UnicodeError``. Other\n possible values are ``\'ignore\'``, ``\'replace\'``,\n ``\'xmlcharrefreplace\'``, ``\'backslashreplace\'`` and any other name\n registered via ``codecs.register_error()``, see section *Codec Base\n Classes*. For a list of possible encodings, see section *Standard\n Encodings*.\n\n Changed in version 3.1: Support for keyword arguments added.\n\nstr.endswith(suffix[, start[, end]])\n\n Return ``True`` if the string ends with the specified *suffix*,\n otherwise return ``False``. *suffix* can also be a tuple of\n suffixes to look for. With optional *start*, test beginning at\n that position. With optional *end*, stop comparing at that\n position.\n\nstr.expandtabs([tabsize])\n\n Return a copy of the string where all tab characters are replaced\n by zero or more spaces, depending on the current column and the\n given tab size. The column number is reset to zero after each\n newline occurring in the string. If *tabsize* is not given, a tab\n size of ``8`` characters is assumed. This doesn\'t understand other\n non-printing characters or escape sequences.\n\nstr.find(sub[, start[, end]])\n\n Return the lowest index in the string where substring *sub* is\n found, such that *sub* is contained in the slice ``s[start:end]``.\n Optional arguments *start* and *end* are interpreted as in slice\n notation. Return ``-1`` if *sub* is not found.\n\n Note: The ``find()`` method should be used only if you need to know the\n position of *sub*. To check if *sub* is a substring or not, use\n the ``in`` operator:\n\n >>> \'Py\' in \'Python\'\n True\n\nstr.format(*args, **kwargs)\n\n Perform a string formatting operation. The string on which this\n method is called can contain literal text or replacement fields\n delimited by braces ``{}``. Each replacement field contains either\n the numeric index of a positional argument, or the name of a\n keyword argument. Returns a copy of the string where each\n replacement field is replaced with the string value of the\n corresponding argument.\n\n >>> "The sum of 1 + 2 is {0}".format(1+2)\n \'The sum of 1 + 2 is 3\'\n\n See *Format String Syntax* for a description of the various\n formatting options that can be specified in format strings.\n\nstr.format_map(mapping)\n\n Similar to ``str.format(**mapping)``, except that ``mapping`` is\n used directly and not copied to a ``dict`` . This is useful if for\n example ``mapping`` is a dict subclass:\n\n >>> class Default(dict):\n ... def __missing__(self, key):\n ... return key\n ...\n >>> \'{name} was born in {country}\'.format_map(Default(name=\'Guido\'))\n \'Guido was born in country\'\n\n New in version 3.2.\n\nstr.index(sub[, start[, end]])\n\n Like ``find()``, but raise ``ValueError`` when the substring is not\n found.\n\nstr.isalnum()\n\n Return true if all characters in the string are alphanumeric and\n there is at least one character, false otherwise. A character\n ``c`` is alphanumeric if one of the following returns ``True``:\n ``c.isalpha()``, ``c.isdecimal()``, ``c.isdigit()``, or\n ``c.isnumeric()``.\n\nstr.isalpha()\n\n Return true if all characters in the string are alphabetic and\n there is at least one character, false otherwise. Alphabetic\n characters are those characters defined in the Unicode character\n database as "Letter", i.e., those with general category property\n being one of "Lm", "Lt", "Lu", "Ll", or "Lo". Note that this is\n different from the "Alphabetic" property defined in the Unicode\n Standard.\n\nstr.isdecimal()\n\n Return true if all characters in the string are decimal characters\n and there is at least one character, false otherwise. Decimal\n characters are those from general category "Nd". This category\n includes digit characters, and all characters that can be used to\n form decimal-radix numbers, e.g. U+0660, ARABIC-INDIC DIGIT ZERO.\n\nstr.isdigit()\n\n Return true if all characters in the string are digits and there is\n at least one character, false otherwise. Digits include decimal\n characters and digits that need special handling, such as the\n compatibility superscript digits. Formally, a digit is a character\n that has the property value Numeric_Type=Digit or\n Numeric_Type=Decimal.\n\nstr.isidentifier()\n\n Return true if the string is a valid identifier according to the\n language definition, section *Identifiers and keywords*.\n\nstr.islower()\n\n Return true if all cased characters [4] in the string are lowercase\n and there is at least one cased character, false otherwise.\n\nstr.isnumeric()\n\n Return true if all characters in the string are numeric characters,\n and there is at least one character, false otherwise. Numeric\n characters include digit characters, and all characters that have\n the Unicode numeric value property, e.g. U+2155, VULGAR FRACTION\n ONE FIFTH. Formally, numeric characters are those with the\n property value Numeric_Type=Digit, Numeric_Type=Decimal or\n Numeric_Type=Numeric.\n\nstr.isprintable()\n\n Return true if all characters in the string are printable or the\n string is empty, false otherwise. Nonprintable characters are\n those characters defined in the Unicode character database as\n "Other" or "Separator", excepting the ASCII space (0x20) which is\n considered printable. (Note that printable characters in this\n context are those which should not be escaped when ``repr()`` is\n invoked on a string. It has no bearing on the handling of strings\n written to ``sys.stdout`` or ``sys.stderr``.)\n\nstr.isspace()\n\n Return true if there are only whitespace characters in the string\n and there is at least one character, false otherwise. Whitespace\n characters are those characters defined in the Unicode character\n database as "Other" or "Separator" and those with bidirectional\n property being one of "WS", "B", or "S".\n\nstr.istitle()\n\n Return true if the string is a titlecased string and there is at\n least one character, for example uppercase characters may only\n follow uncased characters and lowercase characters only cased ones.\n Return false otherwise.\n\nstr.isupper()\n\n Return true if all cased characters [4] in the string are uppercase\n and there is at least one cased character, false otherwise.\n\nstr.join(iterable)\n\n Return a string which is the concatenation of the strings in the\n *iterable* *iterable*. A ``TypeError`` will be raised if there are\n any non-string values in *iterable*, including ``bytes`` objects.\n The separator between elements is the string providing this method.\n\nstr.ljust(width[, fillchar])\n\n Return the string left justified in a string of length *width*.\n Padding is done using the specified *fillchar* (default is a\n space). The original string is returned if *width* is less than or\n equal to ``len(s)``.\n\nstr.lower()\n\n Return a copy of the string with all the cased characters [4]\n converted to lowercase.\n\nstr.lstrip([chars])\n\n Return a copy of the string with leading characters removed. The\n *chars* argument is a string specifying the set of characters to be\n removed. If omitted or ``None``, the *chars* argument defaults to\n removing whitespace. The *chars* argument is not a prefix; rather,\n all combinations of its values are stripped:\n\n >>> \' spacious \'.lstrip()\n \'spacious \'\n >>> \'www.example.com\'.lstrip(\'cmowz.\')\n \'example.com\'\n\nstatic str.maketrans(x[, y[, z]])\n\n This static method returns a translation table usable for\n ``str.translate()``.\n\n If there is only one argument, it must be a dictionary mapping\n Unicode ordinals (integers) or characters (strings of length 1) to\n Unicode ordinals, strings (of arbitrary lengths) or None.\n Character keys will then be converted to ordinals.\n\n If there are two arguments, they must be strings of equal length,\n and in the resulting dictionary, each character in x will be mapped\n to the character at the same position in y. If there is a third\n argument, it must be a string, whose characters will be mapped to\n None in the result.\n\nstr.partition(sep)\n\n Split the string at the first occurrence of *sep*, and return a\n 3-tuple containing the part before the separator, the separator\n itself, and the part after the separator. If the separator is not\n found, return a 3-tuple containing the string itself, followed by\n two empty strings.\n\nstr.replace(old, new[, count])\n\n Return a copy of the string with all occurrences of substring *old*\n replaced by *new*. If the optional argument *count* is given, only\n the first *count* occurrences are replaced.\n\nstr.rfind(sub[, start[, end]])\n\n Return the highest index in the string where substring *sub* is\n found, such that *sub* is contained within ``s[start:end]``.\n Optional arguments *start* and *end* are interpreted as in slice\n notation. Return ``-1`` on failure.\n\nstr.rindex(sub[, start[, end]])\n\n Like ``rfind()`` but raises ``ValueError`` when the substring *sub*\n is not found.\n\nstr.rjust(width[, fillchar])\n\n Return the string right justified in a string of length *width*.\n Padding is done using the specified *fillchar* (default is a\n space). The original string is returned if *width* is less than or\n equal to ``len(s)``.\n\nstr.rpartition(sep)\n\n Split the string at the last occurrence of *sep*, and return a\n 3-tuple containing the part before the separator, the separator\n itself, and the part after the separator. If the separator is not\n found, return a 3-tuple containing two empty strings, followed by\n the string itself.\n\nstr.rsplit([sep[, maxsplit]])\n\n Return a list of the words in the string, using *sep* as the\n delimiter string. If *maxsplit* is given, at most *maxsplit* splits\n are done, the *rightmost* ones. If *sep* is not specified or\n ``None``, any whitespace string is a separator. Except for\n splitting from the right, ``rsplit()`` behaves like ``split()``\n which is described in detail below.\n\nstr.rstrip([chars])\n\n Return a copy of the string with trailing characters removed. The\n *chars* argument is a string specifying the set of characters to be\n removed. If omitted or ``None``, the *chars* argument defaults to\n removing whitespace. The *chars* argument is not a suffix; rather,\n all combinations of its values are stripped:\n\n >>> \' spacious \'.rstrip()\n \' spacious\'\n >>> \'mississippi\'.rstrip(\'ipz\')\n \'mississ\'\n\nstr.split([sep[, maxsplit]])\n\n Return a list of the words in the string, using *sep* as the\n delimiter string. If *maxsplit* is given, at most *maxsplit*\n splits are done (thus, the list will have at most ``maxsplit+1``\n elements). If *maxsplit* is not specified, then there is no limit\n on the number of splits (all possible splits are made).\n\n If *sep* is given, consecutive delimiters are not grouped together\n and are deemed to delimit empty strings (for example,\n ``\'1,,2\'.split(\',\')`` returns ``[\'1\', \'\', \'2\']``). The *sep*\n argument may consist of multiple characters (for example,\n ``\'1<>2<>3\'.split(\'<>\')`` returns ``[\'1\', \'2\', \'3\']``). Splitting\n an empty string with a specified separator returns ``[\'\']``.\n\n If *sep* is not specified or is ``None``, a different splitting\n algorithm is applied: runs of consecutive whitespace are regarded\n as a single separator, and the result will contain no empty strings\n at the start or end if the string has leading or trailing\n whitespace. Consequently, splitting an empty string or a string\n consisting of just whitespace with a ``None`` separator returns\n ``[]``.\n\n For example, ``\' 1 2 3 \'.split()`` returns ``[\'1\', \'2\', \'3\']``,\n and ``\' 1 2 3 \'.split(None, 1)`` returns ``[\'1\', \'2 3 \']``.\n\nstr.splitlines([keepends])\n\n Return a list of the lines in the string, breaking at line\n boundaries. Line breaks are not included in the resulting list\n unless *keepends* is given and true.\n\nstr.startswith(prefix[, start[, end]])\n\n Return ``True`` if string starts with the *prefix*, otherwise\n return ``False``. *prefix* can also be a tuple of prefixes to look\n for. With optional *start*, test string beginning at that\n position. With optional *end*, stop comparing string at that\n position.\n\nstr.strip([chars])\n\n Return a copy of the string with the leading and trailing\n characters removed. The *chars* argument is a string specifying the\n set of characters to be removed. If omitted or ``None``, the\n *chars* argument defaults to removing whitespace. The *chars*\n argument is not a prefix or suffix; rather, all combinations of its\n values are stripped:\n\n >>> \' spacious \'.strip()\n \'spacious\'\n >>> \'www.example.com\'.strip(\'cmowz.\')\n \'example\'\n\nstr.swapcase()\n\n Return a copy of the string with uppercase characters converted to\n lowercase and vice versa.\n\nstr.title()\n\n Return a titlecased version of the string where words start with an\n uppercase character and the remaining characters are lowercase.\n\n The algorithm uses a simple language-independent definition of a\n word as groups of consecutive letters. The definition works in\n many contexts but it means that apostrophes in contractions and\n possessives form word boundaries, which may not be the desired\n result:\n\n >>> "they\'re bill\'s friends from the UK".title()\n "They\'Re Bill\'S Friends From The Uk"\n\n A workaround for apostrophes can be constructed using regular\n expressions:\n\n >>> import re\n >>> def titlecase(s):\n return re.sub(r"[A-Za-z]+(\'[A-Za-z]+)?",\n lambda mo: mo.group(0)[0].upper() +\n mo.group(0)[1:].lower(),\n s)\n\n >>> titlecase("they\'re bill\'s friends.")\n "They\'re Bill\'s Friends."\n\nstr.translate(map)\n\n Return a copy of the *s* where all characters have been mapped\n through the *map* which must be a dictionary of Unicode ordinals\n (integers) to Unicode ordinals, strings or ``None``. Unmapped\n characters are left untouched. Characters mapped to ``None`` are\n deleted.\n\n You can use ``str.maketrans()`` to create a translation map from\n character-to-character mappings in different formats.\n\n Note: An even more flexible approach is to create a custom character\n mapping codec using the ``codecs`` module (see\n ``encodings.cp1251`` for an example).\n\nstr.upper()\n\n Return a copy of the string with all the cased characters [4]\n converted to uppercase. Note that ``str.upper().isupper()`` might\n be ``False`` if ``s`` contains uncased characters or if the Unicode\n category of the resulting character(s) is not "Lu" (Letter,\n uppercase), but e.g. "Lt" (Letter, titlecase).\n\nstr.zfill(width)\n\n Return the numeric string left filled with zeros in a string of\n length *width*. A sign prefix is handled correctly. The original\n string is returned if *width* is less than or equal to ``len(s)``.\n\n\nOld String Formatting Operations\n================================\n\nNote: The formatting operations described here are obsolete and may go\n away in future versions of Python. Use the new *String Formatting*\n in new code.\n\nString objects have one unique built-in operation: the ``%`` operator\n(modulo). This is also known as the string *formatting* or\n*interpolation* operator. Given ``format % values`` (where *format* is\na string), ``%`` conversion specifications in *format* are replaced\nwith zero or more elements of *values*. The effect is similar to the\nusing ``sprintf()`` in the C language.\n\nIf *format* requires a single argument, *values* may be a single non-\ntuple object. [5] Otherwise, *values* must be a tuple with exactly\nthe number of items specified by the format string, or a single\nmapping object (for example, a dictionary).\n\nA conversion specifier contains two or more characters and has the\nfollowing components, which must occur in this order:\n\n1. The ``\'%\'`` character, which marks the start of the specifier.\n\n2. Mapping key (optional), consisting of a parenthesised sequence of\n characters (for example, ``(somename)``).\n\n3. Conversion flags (optional), which affect the result of some\n conversion types.\n\n4. Minimum field width (optional). If specified as an ``\'*\'``\n (asterisk), the actual width is read from the next element of the\n tuple in *values*, and the object to convert comes after the\n minimum field width and optional precision.\n\n5. Precision (optional), given as a ``\'.\'`` (dot) followed by the\n precision. If specified as ``\'*\'`` (an asterisk), the actual\n precision is read from the next element of the tuple in *values*,\n and the value to convert comes after the precision.\n\n6. Length modifier (optional).\n\n7. Conversion type.\n\nWhen the right argument is a dictionary (or other mapping type), then\nthe formats in the string *must* include a parenthesised mapping key\ninto that dictionary inserted immediately after the ``\'%\'`` character.\nThe mapping key selects the value to be formatted from the mapping.\nFor example:\n\n>>> print(\'%(language)s has %(number)03d quote types.\' %\n... {\'language\': "Python", "number": 2})\nPython has 002 quote types.\n\nIn this case no ``*`` specifiers may occur in a format (since they\nrequire a sequential parameter list).\n\nThe conversion flag characters are:\n\n+-----------+-----------------------------------------------------------------------+\n| Flag | Meaning |\n+===========+=======================================================================+\n| ``\'#\'`` | The value conversion will use the "alternate form" (where defined |\n| | below). |\n+-----------+-----------------------------------------------------------------------+\n| ``\'0\'`` | The conversion will be zero padded for numeric values. |\n+-----------+-----------------------------------------------------------------------+\n| ``\'-\'`` | The converted value is left adjusted (overrides the ``\'0\'`` |\n| | conversion if both are given). |\n+-----------+-----------------------------------------------------------------------+\n| ``\' \'`` | (a space) A blank should be left before a positive number (or empty |\n| | string) produced by a signed conversion. |\n+-----------+-----------------------------------------------------------------------+\n| ``\'+\'`` | A sign character (``\'+\'`` or ``\'-\'``) will precede the conversion |\n| | (overrides a "space" flag). |\n+-----------+-----------------------------------------------------------------------+\n\nA length modifier (``h``, ``l``, or ``L``) may be present, but is\nignored as it is not necessary for Python -- so e.g. ``%ld`` is\nidentical to ``%d``.\n\nThe conversion types are:\n\n+--------------+-------------------------------------------------------+---------+\n| Conversion | Meaning | Notes |\n+==============+=======================================================+=========+\n| ``\'d\'`` | Signed integer decimal. | |\n+--------------+-------------------------------------------------------+---------+\n| ``\'i\'`` | Signed integer decimal. | |\n+--------------+-------------------------------------------------------+---------+\n| ``\'o\'`` | Signed octal value. | (1) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'u\'`` | Obsolete type -- it is identical to ``\'d\'``. | (7) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'x\'`` | Signed hexadecimal (lowercase). | (2) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'X\'`` | Signed hexadecimal (uppercase). | (2) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'e\'`` | Floating point exponential format (lowercase). | (3) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'E\'`` | Floating point exponential format (uppercase). | (3) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'f\'`` | Floating point decimal format. | (3) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'F\'`` | Floating point decimal format. | (3) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'g\'`` | Floating point format. Uses lowercase exponential | (4) |\n| | format if exponent is less than -4 or not less than | |\n| | precision, decimal format otherwise. | |\n+--------------+-------------------------------------------------------+---------+\n| ``\'G\'`` | Floating point format. Uses uppercase exponential | (4) |\n| | format if exponent is less than -4 or not less than | |\n| | precision, decimal format otherwise. | |\n+--------------+-------------------------------------------------------+---------+\n| ``\'c\'`` | Single character (accepts integer or single character | |\n| | string). | |\n+--------------+-------------------------------------------------------+---------+\n| ``\'r\'`` | String (converts any Python object using ``repr()``). | (5) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'s\'`` | String (converts any Python object using ``str()``). | (5) |\n+--------------+-------------------------------------------------------+---------+\n| ``\'a\'`` | String (converts any Python object using | (5) |\n| | ``ascii()``). | |\n+--------------+-------------------------------------------------------+---------+\n| ``\'%\'`` | No argument is converted, results in a ``\'%\'`` | |\n| | character in the result. | |\n+--------------+-------------------------------------------------------+---------+\n\nNotes:\n\n1. The alternate form causes a leading zero (``\'0\'``) to be inserted\n between left-hand padding and the formatting of the number if the\n leading character of the result is not already a zero.\n\n2. The alternate form causes a leading ``\'0x\'`` or ``\'0X\'`` (depending\n on whether the ``\'x\'`` or ``\'X\'`` format was used) to be inserted\n between left-hand padding and the formatting of the number if the\n leading character of the result is not already a zero.\n\n3. The alternate form causes the result to always contain a decimal\n point, even if no digits follow it.\n\n The precision determines the number of digits after the decimal\n point and defaults to 6.\n\n4. The alternate form causes the result to always contain a decimal\n point, and trailing zeroes are not removed as they would otherwise\n be.\n\n The precision determines the number of significant digits before\n and after the decimal point and defaults to 6.\n\n5. If precision is ``N``, the output is truncated to ``N`` characters.\n\n1. See **PEP 237**.\n\nSince Python strings have an explicit length, ``%s`` conversions do\nnot assume that ``\'\\0\'`` is the end of the string.\n\nChanged in version 3.1: ``%f`` conversions for numbers whose absolute\nvalue is over 1e50 are no longer replaced by ``%g`` conversions.\n\nAdditional string operations are defined in standard modules\n``string`` and ``re``.\n\n\nRange Type\n==========\n\nThe ``range`` type is an immutable sequence which is commonly used for\nlooping. The advantage of the ``range`` type is that an ``range``\nobject will always take the same amount of memory, no matter the size\nof the range it represents.\n\nRange objects have relatively little behavior: they support indexing,\ncontains, iteration, the ``len()`` function, and the following\nmethods:\n\nrange.count(x)\n\n Return the number of *i*\'s for which ``s[i] == x``.\n\n New in version 3.2.\n\nrange.index(x)\n\n Return the smallest *i* such that ``s[i] == x``. Raises\n ``ValueError`` when *x* is not in the range.\n\n New in version 3.2.\n\n\nMutable Sequence Types\n======================\n\nList and bytearray objects support additional operations that allow\nin-place modification of the object. Other mutable sequence types\n(when added to the language) should also support these operations.\nStrings and tuples are immutable sequence types: such objects cannot\nbe modified once created. The following operations are defined on\nmutable sequence types (where *x* is an arbitrary object).\n\nNote that while lists allow their items to be of any type, bytearray\nobject "items" are all integers in the range 0 <= x < 256.\n\n+--------------------------------+----------------------------------+-----------------------+\n| Operation | Result | Notes |\n+================================+==================================+=======================+\n| ``s[i] = x`` | item *i* of *s* is replaced by | |\n| | *x* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s[i:j] = t`` | slice of *s* from *i* to *j* is | |\n| | replaced by the contents of the | |\n| | iterable *t* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``del s[i:j]`` | same as ``s[i:j] = []`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s[i:j:k] = t`` | the elements of ``s[i:j:k]`` are | (1) |\n| | replaced by those of *t* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``del s[i:j:k]`` | removes the elements of | |\n| | ``s[i:j:k]`` from the list | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.append(x)`` | same as ``s[len(s):len(s)] = | |\n| | [x]`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.extend(x)`` | same as ``s[len(s):len(s)] = x`` | (2) |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.count(x)`` | return number of *i*\'s for which | |\n| | ``s[i] == x`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.index(x[, i[, j]])`` | return smallest *k* such that | (3) |\n| | ``s[k] == x`` and ``i <= k < j`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.insert(i, x)`` | same as ``s[i:i] = [x]`` | (4) |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.pop([i])`` | same as ``x = s[i]; del s[i]; | (5) |\n| | return x`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.remove(x)`` | same as ``del s[s.index(x)]`` | (3) |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.reverse()`` | reverses the items of *s* in | (6) |\n| | place | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.sort([key[, reverse]])`` | sort the items of *s* in place | (6), (7), (8) |\n+--------------------------------+----------------------------------+-----------------------+\n\nNotes:\n\n1. *t* must have the same length as the slice it is replacing.\n\n2. *x* can be any iterable object.\n\n3. Raises ``ValueError`` when *x* is not found in *s*. When a negative\n index is passed as the second or third parameter to the ``index()``\n method, the sequence length is added, as for slice indices. If it\n is still negative, it is truncated to zero, as for slice indices.\n\n4. When a negative index is passed as the first parameter to the\n ``insert()`` method, the sequence length is added, as for slice\n indices. If it is still negative, it is truncated to zero, as for\n slice indices.\n\n5. The optional argument *i* defaults to ``-1``, so that by default\n the last item is removed and returned.\n\n6. The ``sort()`` and ``reverse()`` methods modify the sequence in\n place for economy of space when sorting or reversing a large\n sequence. To remind you that they operate by side effect, they\n don\'t return the sorted or reversed sequence.\n\n7. The ``sort()`` method takes optional arguments for controlling the\n comparisons. Each must be specified as a keyword argument.\n\n *key* specifies a function of one argument that is used to extract\n a comparison key from each list element: ``key=str.lower``. The\n default value is ``None``. Use ``functools.cmp_to_key()`` to\n convert an old-style *cmp* function to a *key* function.\n\n *reverse* is a boolean value. If set to ``True``, then the list\n elements are sorted as if each comparison were reversed.\n\n The ``sort()`` method is guaranteed to be stable. A sort is stable\n if it guarantees not to change the relative order of elements that\n compare equal --- this is helpful for sorting in multiple passes\n (for example, sort by department, then by salary grade).\n\n **CPython implementation detail:** While a list is being sorted,\n the effect of attempting to mutate, or even inspect, the list is\n undefined. The C implementation of Python makes the list appear\n empty for the duration, and raises ``ValueError`` if it can detect\n that the list has been mutated during a sort.\n\n8. ``sort()`` is not supported by ``bytearray`` objects.\n\n\nBytes and Byte Array Methods\n============================\n\nBytes and bytearray objects, being "strings of bytes", have all\nmethods found on strings, with the exception of ``encode()``,\n``format()`` and ``isidentifier()``, which do not make sense with\nthese types. For converting the objects to strings, they have a\n``decode()`` method.\n\nWherever one of these methods needs to interpret the bytes as\ncharacters (e.g. the ``is...()`` methods), the ASCII character set is\nassumed.\n\nNote: The methods on bytes and bytearray objects don\'t accept strings as\n their arguments, just as the methods on strings don\'t accept bytes\n as their arguments. For example, you have to write\n\n a = "abc"\n b = a.replace("a", "f")\n\n and\n\n a = b"abc"\n b = a.replace(b"a", b"f")\n\nbytes.decode(encoding="utf-8", errors="strict")\nbytearray.decode(encoding="utf-8", errors="strict")\n\n Return a string decoded from the given bytes. Default encoding is\n ``\'utf-8\'``. *errors* may be given to set a different error\n handling scheme. The default for *errors* is ``\'strict\'``, meaning\n that encoding errors raise a ``UnicodeError``. Other possible\n values are ``\'ignore\'``, ``\'replace\'`` and any other name\n registered via ``codecs.register_error()``, see section *Codec Base\n Classes*. For a list of possible encodings, see section *Standard\n Encodings*.\n\n Changed in version 3.1: Added support for keyword arguments.\n\nThe bytes and bytearray types have an additional class method:\n\nclassmethod bytes.fromhex(string)\nclassmethod bytearray.fromhex(string)\n\n This ``bytes`` class method returns a bytes or bytearray object,\n decoding the given string object. The string must contain two\n hexadecimal digits per byte, spaces are ignored.\n\n >>> bytes.fromhex(\'f0 f1f2 \')\n b\'\\xf0\\xf1\\xf2\'\n\nThe maketrans and translate methods differ in semantics from the\nversions available on strings:\n\nbytes.translate(table[, delete])\nbytearray.translate(table[, delete])\n\n Return a copy of the bytes or bytearray object where all bytes\n occurring in the optional argument *delete* are removed, and the\n remaining bytes have been mapped through the given translation\n table, which must be a bytes object of length 256.\n\n You can use the ``bytes.maketrans()`` method to create a\n translation table.\n\n Set the *table* argument to ``None`` for translations that only\n delete characters:\n\n >>> b\'read this short text\'.translate(None, b\'aeiou\')\n b\'rd ths shrt txt\'\n\nstatic bytes.maketrans(from, to)\nstatic bytearray.maketrans(from, to)\n\n This static method returns a translation table usable for\n ``bytes.translate()`` that will map each character in *from* into\n the character at the same position in *to*; *from* and *to* must be\n bytes objects and have the same length.\n\n New in version 3.1.\n',
- 'typesseq-mutable': '\nMutable Sequence Types\n**********************\n\nList and bytearray objects support additional operations that allow\nin-place modification of the object. Other mutable sequence types\n(when added to the language) should also support these operations.\nStrings and tuples are immutable sequence types: such objects cannot\nbe modified once created. The following operations are defined on\nmutable sequence types (where *x* is an arbitrary object).\n\nNote that while lists allow their items to be of any type, bytearray\nobject "items" are all integers in the range 0 <= x < 256.\n\n+--------------------------------+----------------------------------+-----------------------+\n| Operation | Result | Notes |\n+================================+==================================+=======================+\n| ``s[i] = x`` | item *i* of *s* is replaced by | |\n| | *x* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s[i:j] = t`` | slice of *s* from *i* to *j* is | |\n| | replaced by the contents of the | |\n| | iterable *t* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``del s[i:j]`` | same as ``s[i:j] = []`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s[i:j:k] = t`` | the elements of ``s[i:j:k]`` are | (1) |\n| | replaced by those of *t* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``del s[i:j:k]`` | removes the elements of | |\n| | ``s[i:j:k]`` from the list | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.append(x)`` | same as ``s[len(s):len(s)] = | |\n| | [x]`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.extend(x)`` | same as ``s[len(s):len(s)] = x`` | (2) |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.count(x)`` | return number of *i*\'s for which | |\n| | ``s[i] == x`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.index(x[, i[, j]])`` | return smallest *k* such that | (3) |\n| | ``s[k] == x`` and ``i <= k < j`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.insert(i, x)`` | same as ``s[i:i] = [x]`` | (4) |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.pop([i])`` | same as ``x = s[i]; del s[i]; | (5) |\n| | return x`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.remove(x)`` | same as ``del s[s.index(x)]`` | (3) |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.reverse()`` | reverses the items of *s* in | (6) |\n| | place | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.sort([key[, reverse]])`` | sort the items of *s* in place | (6), (7), (8) |\n+--------------------------------+----------------------------------+-----------------------+\n\nNotes:\n\n1. *t* must have the same length as the slice it is replacing.\n\n2. *x* can be any iterable object.\n\n3. Raises ``ValueError`` when *x* is not found in *s*. When a negative\n index is passed as the second or third parameter to the ``index()``\n method, the sequence length is added, as for slice indices. If it\n is still negative, it is truncated to zero, as for slice indices.\n\n4. When a negative index is passed as the first parameter to the\n ``insert()`` method, the sequence length is added, as for slice\n indices. If it is still negative, it is truncated to zero, as for\n slice indices.\n\n5. The optional argument *i* defaults to ``-1``, so that by default\n the last item is removed and returned.\n\n6. The ``sort()`` and ``reverse()`` methods modify the sequence in\n place for economy of space when sorting or reversing a large\n sequence. To remind you that they operate by side effect, they\n don\'t return the sorted or reversed sequence.\n\n7. The ``sort()`` method takes optional arguments for controlling the\n comparisons. Each must be specified as a keyword argument.\n\n *key* specifies a function of one argument that is used to extract\n a comparison key from each list element: ``key=str.lower``. The\n default value is ``None``. Use ``functools.cmp_to_key()`` to\n convert an old-style *cmp* function to a *key* function.\n\n *reverse* is a boolean value. If set to ``True``, then the list\n elements are sorted as if each comparison were reversed.\n\n The ``sort()`` method is guaranteed to be stable. A sort is stable\n if it guarantees not to change the relative order of elements that\n compare equal --- this is helpful for sorting in multiple passes\n (for example, sort by department, then by salary grade).\n\n **CPython implementation detail:** While a list is being sorted,\n the effect of attempting to mutate, or even inspect, the list is\n undefined. The C implementation of Python makes the list appear\n empty for the duration, and raises ``ValueError`` if it can detect\n that the list has been mutated during a sort.\n\n8. ``sort()`` is not supported by ``bytearray`` objects.\n',
+ 'typesseq': '\nSequence Types --- ``list``, ``tuple``, ``range``\n*************************************************\n\nThere are three basic sequence types: lists, tuples, and range\nobjects. Additional sequence types tailored for processing of *binary\ndata* and *text strings* are described in dedicated sections.\n\n\nCommon Sequence Operations\n==========================\n\nThe operations in the following table are supported by most sequence\ntypes, both mutable and immutable. The ``collections.abc.Sequence``\nABC is provided to make it easier to correctly implement these\noperations on custom sequence types.\n\nThis table lists the sequence operations sorted in ascending priority\n(operations in the same box have the same priority). In the table,\n*s* and *t* are sequences of the same type, *n*, *i*, *j* and *k* are\nintegers and *x* is an arbitrary object that meets any type and value\nrestrictions imposed by *s*.\n\nThe ``in`` and ``not in`` operations have the same priorities as the\ncomparison operations. The ``+`` (concatenation) and ``*``\n(repetition) operations have the same priority as the corresponding\nnumeric operations.\n\n+----------------------------+----------------------------------+------------+\n| Operation | Result | Notes |\n+============================+==================================+============+\n| ``x in s`` | ``True`` if an item of *s* is | (1) |\n| | equal to *x*, else ``False`` | |\n+----------------------------+----------------------------------+------------+\n| ``x not in s`` | ``False`` if an item of *s* is | (1) |\n| | equal to *x*, else ``True`` | |\n+----------------------------+----------------------------------+------------+\n| ``s + t`` | the concatenation of *s* and *t* | (6)(7) |\n+----------------------------+----------------------------------+------------+\n| ``s * n`` or ``n * s`` | *n* shallow copies of *s* | (2)(7) |\n| | concatenated | |\n+----------------------------+----------------------------------+------------+\n| ``s[i]`` | *i*th item of *s*, origin 0 | (3) |\n+----------------------------+----------------------------------+------------+\n| ``s[i:j]`` | slice of *s* from *i* to *j* | (3)(4) |\n+----------------------------+----------------------------------+------------+\n| ``s[i:j:k]`` | slice of *s* from *i* to *j* | (3)(5) |\n| | with step *k* | |\n+----------------------------+----------------------------------+------------+\n| ``len(s)`` | length of *s* | |\n+----------------------------+----------------------------------+------------+\n| ``min(s)`` | smallest item of *s* | |\n+----------------------------+----------------------------------+------------+\n| ``max(s)`` | largest item of *s* | |\n+----------------------------+----------------------------------+------------+\n| ``s.index(x[, i[, j]])`` | index of the first occurence of | (8) |\n| | *x* in *s* (at or after index | |\n| | *i* and before index *j*) | |\n+----------------------------+----------------------------------+------------+\n| ``s.count(x)`` | total number of occurences of | |\n| | *x* in *s* | |\n+----------------------------+----------------------------------+------------+\n\nSequences of the same type also support comparisons. In particular,\ntuples and lists are compared lexicographically by comparing\ncorresponding elements. This means that to compare equal, every\nelement must compare equal and the two sequences must be of the same\ntype and have the same length. (For full details see *Comparisons* in\nthe language reference.)\n\nNotes:\n\n1. While the ``in`` and ``not in`` operations are used only for simple\n containment testing in the general case, some specialised sequences\n (such as ``str``, ``bytes`` and ``bytearray``) also use them for\n subsequence testing:\n\n >>> "gg" in "eggs"\n True\n\n2. Values of *n* less than ``0`` are treated as ``0`` (which yields an\n empty sequence of the same type as *s*). Note also that the copies\n are shallow; nested structures are not copied. This often haunts\n new Python programmers; consider:\n\n >>> lists = [[]] * 3\n >>> lists\n [[], [], []]\n >>> lists[0].append(3)\n >>> lists\n [[3], [3], [3]]\n\n What has happened is that ``[[]]`` is a one-element list containing\n an empty list, so all three elements of ``[[]] * 3`` are (pointers\n to) this single empty list. Modifying any of the elements of\n ``lists`` modifies this single list. You can create a list of\n different lists this way:\n\n >>> lists = [[] for i in range(3)]\n >>> lists[0].append(3)\n >>> lists[1].append(5)\n >>> lists[2].append(7)\n >>> lists\n [[3], [5], [7]]\n\n3. If *i* or *j* is negative, the index is relative to the end of the\n string: ``len(s) + i`` or ``len(s) + j`` is substituted. But note\n that ``-0`` is still ``0``.\n\n4. The slice of *s* from *i* to *j* is defined as the sequence of\n items with index *k* such that ``i <= k < j``. If *i* or *j* is\n greater than ``len(s)``, use ``len(s)``. If *i* is omitted or\n ``None``, use ``0``. If *j* is omitted or ``None``, use\n ``len(s)``. If *i* is greater than or equal to *j*, the slice is\n empty.\n\n5. The slice of *s* from *i* to *j* with step *k* is defined as the\n sequence of items with index ``x = i + n*k`` such that ``0 <= n <\n (j-i)/k``. In other words, the indices are ``i``, ``i+k``,\n ``i+2*k``, ``i+3*k`` and so on, stopping when *j* is reached (but\n never including *j*). If *i* or *j* is greater than ``len(s)``,\n use ``len(s)``. If *i* or *j* are omitted or ``None``, they become\n "end" values (which end depends on the sign of *k*). Note, *k*\n cannot be zero. If *k* is ``None``, it is treated like ``1``.\n\n6. Concatenating immutable sequences always results in a new object.\n This means that building up a sequence by repeated concatenation\n will have a quadratic runtime cost in the total sequence length.\n To get a linear runtime cost, you must switch to one of the\n alternatives below:\n\n * if concatenating ``str`` objects, you can build a list and use\n ``str.join()`` at the end or else write to a ``io.StringIO``\n instance and retrieve its value when complete\n\n * if concatenating ``bytes`` objects, you can similarly use\n ``bytes.join()`` or ``io.BytesIO``, or you can do in-place\n concatenation with a ``bytearray`` object. ``bytearray`` objects\n are mutable and have an efficient overallocation mechanism\n\n * if concatenating ``tuple`` objects, extend a ``list`` instead\n\n * for other types, investigate the relevant class documentation\n\n7. Some sequence types (such as ``range``) only support item sequences\n that follow specific patterns, and hence don\'t support sequence\n concatenation or repetition.\n\n8. ``index`` raises ``ValueError`` when *x* is not found in *s*. When\n supported, the additional arguments to the index method allow\n efficient searching of subsections of the sequence. Passing the\n extra arguments is roughly equivalent to using ``s[i:j].index(x)``,\n only without copying any data and with the returned index being\n relative to the start of the sequence rather than the start of the\n slice.\n\n\nImmutable Sequence Types\n========================\n\nThe only operation that immutable sequence types generally implement\nthat is not also implemented by mutable sequence types is support for\nthe ``hash()`` built-in.\n\nThis support allows immutable sequences, such as ``tuple`` instances,\nto be used as ``dict`` keys and stored in ``set`` and ``frozenset``\ninstances.\n\nAttempting to hash an immutable sequence that contains unhashable\nvalues will result in ``TypeError``.\n\n\nMutable Sequence Types\n======================\n\nThe operations in the following table are defined on mutable sequence\ntypes. The ``collections.abc.MutableSequence`` ABC is provided to make\nit easier to correctly implement these operations on custom sequence\ntypes.\n\nIn the table *s* is an instance of a mutable sequence type, *t* is any\niterable object and *x* is an arbitrary object that meets any type and\nvalue restrictions imposed by *s* (for example, ``bytearray`` only\naccepts integers that meet the value restriction ``0 <= x <= 255``).\n\n+--------------------------------+----------------------------------+-----------------------+\n| Operation | Result | Notes |\n+================================+==================================+=======================+\n| ``s[i] = x`` | item *i* of *s* is replaced by | |\n| | *x* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s[i:j] = t`` | slice of *s* from *i* to *j* is | |\n| | replaced by the contents of the | |\n| | iterable *t* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``del s[i:j]`` | same as ``s[i:j] = []`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s[i:j:k] = t`` | the elements of ``s[i:j:k]`` are | (1) |\n| | replaced by those of *t* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``del s[i:j:k]`` | removes the elements of | |\n| | ``s[i:j:k]`` from the list | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.append(x)`` | appends *x* to the end of the | |\n| | sequence (same as | |\n| | ``s[len(s):len(s)] = [x]``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.clear()`` | removes all items from ``s`` | (5) |\n| | (same as ``del s[:]``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.copy()`` | creates a shallow copy of ``s`` | (5) |\n| | (same as ``s[:]``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.extend(t)`` | extends *s* with the contents of | |\n| | *t* (same as ``s[len(s):len(s)] | |\n| | = t``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.insert(i, x)`` | inserts *x* into *s* at the | |\n| | index given by *i* (same as | |\n| | ``s[i:i] = [x]``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.pop([i])`` | retrieves the item at *i* and | (2) |\n| | also removes it from *s* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.remove(x)`` | remove the first item from *s* | (3) |\n| | where ``s[i] == x`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.reverse()`` | reverses the items of *s* in | (4) |\n| | place | |\n+--------------------------------+----------------------------------+-----------------------+\n\nNotes:\n\n1. *t* must have the same length as the slice it is replacing.\n\n2. The optional argument *i* defaults to ``-1``, so that by default\n the last item is removed and returned.\n\n3. ``remove`` raises ``ValueError`` when *x* is not found in *s*.\n\n4. The ``reverse()`` method modifies the sequence in place for economy\n of space when reversing a large sequence. To remind users that it\n operates by side effect, it does not return the reversed sequence.\n\n5. ``clear()`` and ``copy()`` are included for consistency with the\n interfaces of mutable containers that don\'t support slicing\n operations (such as ``dict`` and ``set``)\n\n New in version 3.3: ``clear()`` and ``copy()`` methods.\n\n\nLists\n=====\n\nLists are mutable sequences, typically used to store collections of\nhomogeneous items (where the precise degree of similarity will vary by\napplication).\n\nclass class list([iterable])\n\n Lists may be constructed in several ways:\n\n * Using a pair of square brackets to denote the empty list: ``[]``\n\n * Using square brackets, separating items with commas: ``[a]``,\n ``[a, b, c]``\n\n * Using a list comprehension: ``[x for x in iterable]``\n\n * Using the type constructor: ``list()`` or ``list(iterable)``\n\n The constructor builds a list whose items are the same and in the\n same order as *iterable*\'s items. *iterable* may be either a\n sequence, a container that supports iteration, or an iterator\n object. If *iterable* is already a list, a copy is made and\n returned, similar to ``iterable[:]``. For example, ``list(\'abc\')``\n returns ``[\'a\', \'b\', \'c\']`` and ``list( (1, 2, 3) )`` returns ``[1,\n 2, 3]``. If no argument is given, the constructor creates a new\n empty list, ``[]``.\n\n Many other operations also produce lists, including the\n ``sorted()`` built-in.\n\n Lists implement all of the *common* and *mutable* sequence\n operations. Lists also provide the following additional method:\n\n sort(*, key=None, reverse=None)\n\n This method sorts the list in place, using only ``<``\n comparisons between items. Exceptions are not suppressed - if\n any comparison operations fail, the entire sort operation will\n fail (and the list will likely be left in a partially modified\n state).\n\n *key* specifies a function of one argument that is used to\n extract a comparison key from each list element (for example,\n ``key=str.lower``). The key corresponding to each item in the\n list is calculated once and then used for the entire sorting\n process. The default value of ``None`` means that list items are\n sorted directly without calculating a separate key value.\n\n The ``functools.cmp_to_key()`` utility is available to convert a\n 2.x style *cmp* function to a *key* function.\n\n *reverse* is a boolean value. If set to ``True``, then the list\n elements are sorted as if each comparison were reversed.\n\n This method modifies the sequence in place for economy of space\n when sorting a large sequence. To remind users that it operates\n by side effect, it does not return the sorted sequence (use\n ``sorted()`` to explicitly request a new sorted list instance).\n\n The ``sort()`` method is guaranteed to be stable. A sort is\n stable if it guarantees not to change the relative order of\n elements that compare equal --- this is helpful for sorting in\n multiple passes (for example, sort by department, then by salary\n grade).\n\n **CPython implementation detail:** While a list is being sorted,\n the effect of attempting to mutate, or even inspect, the list is\n undefined. The C implementation of Python makes the list appear\n empty for the duration, and raises ``ValueError`` if it can\n detect that the list has been mutated during a sort.\n\n\nTuples\n======\n\nTuples are immutable sequences, typically used to store collections of\nheterogeneous data (such as the 2-tuples produced by the\n``enumerate()`` built-in). Tuples are also used for cases where an\nimmutable sequence of homogeneous data is needed (such as allowing\nstorage in a ``set`` or ``dict`` instance).\n\nclass class tuple([iterable])\n\n Tuples may be constructed in a number of ways:\n\n * Using a pair of parentheses to denote the empty tuple: ``()``\n\n * Using a trailing comma for a singleton tuple: ``a,`` or ``(a,)``\n\n * Separating items with commas: ``a, b, c`` or ``(a, b, c)``\n\n * Using the ``tuple()`` built-in: ``tuple()`` or\n ``tuple(iterable)``\n\n The constructor builds a tuple whose items are the same and in the\n same order as *iterable*\'s items. *iterable* may be either a\n sequence, a container that supports iteration, or an iterator\n object. If *iterable* is already a tuple, it is returned\n unchanged. For example, ``tuple(\'abc\')`` returns ``(\'a\', \'b\',\n \'c\')`` and ``tuple( [1, 2, 3] )`` returns ``(1, 2, 3)``. If no\n argument is given, the constructor creates a new empty tuple,\n ``()``.\n\n Note that it is actually the comma which makes a tuple, not the\n parentheses. The parentheses are optional, except in the empty\n tuple case, or when they are needed to avoid syntactic ambiguity.\n For example, ``f(a, b, c)`` is a function call with three\n arguments, while ``f((a, b, c))`` is a function call with a 3-tuple\n as the sole argument.\n\n Tuples implement all of the *common* sequence operations.\n\nFor heterogeneous collections of data where access by name is clearer\nthan access by index, ``collections.namedtuple()`` may be a more\nappropriate choice than a simple tuple object.\n\n\nRanges\n======\n\nThe ``range`` type represents an immutable sequence of numbers and is\ncommonly used for looping a specific number of times in ``for`` loops.\n\nclass class range([start], stop[, step])\n\n The arguments to the range constructor must be integers (either\n built-in ``int`` or any object that implements the ``__index__``\n special method). If the *step* argument is omitted, it defaults to\n ``1``. If the *start* argument is omitted, it defaults to ``0``. If\n *step* is zero, ``ValueError`` is raised.\n\n For a positive *step*, the contents of a range ``r`` are determined\n by the formula ``r[i] = start + step*i`` where ``i >= 0`` and\n ``r[i] < stop``.\n\n For a negative *step*, the contents of the range are still\n determined by the formula ``r[i] = start + step*i``, but the\n constraints are ``i >= 0`` and ``r[i] > stop``.\n\n A range object will be empty if ``r[0]`` does not meant the value\n constraint. Ranges do support negative indices, but these are\n interpreted as indexing from the end of the sequence determined by\n the positive indices.\n\n Ranges containing absolute values larger than ``sys.maxsize`` are\n permitted but some features (such as ``len()``) may raise\n ``OverflowError``.\n\n Range examples:\n\n >>> list(range(10))\n [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n >>> list(range(1, 11))\n [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n >>> list(range(0, 30, 5))\n [0, 5, 10, 15, 20, 25]\n >>> list(range(0, 10, 3))\n [0, 3, 6, 9]\n >>> list(range(0, -10, -1))\n [0, -1, -2, -3, -4, -5, -6, -7, -8, -9]\n >>> list(range(0))\n []\n >>> list(range(1, 0))\n []\n\n Ranges implement all of the *common* sequence operations except\n concatenation and repetition (due to the fact that range objects\n can only represent sequences that follow a strict pattern and\n repetition and concatenation will usually violate that pattern).\n\nThe advantage of the ``range`` type over a regular ``list`` or\n``tuple`` is that a ``range`` object will always take the same (small)\namount of memory, no matter the size of the range it represents (as it\nonly stores the ``start``, ``stop`` and ``step`` values, calculating\nindividual items and subranges as needed).\n\nRange objects implement the ``collections.Sequence`` ABC, and provide\nfeatures such as containment tests, element index lookup, slicing and\nsupport for negative indices (see *Sequence Types --- list, tuple,\nrange*):\n\n>>> r = range(0, 20, 2)\n>>> r\nrange(0, 20, 2)\n>>> 11 in r\nFalse\n>>> 10 in r\nTrue\n>>> r.index(10)\n5\n>>> r[5]\n10\n>>> r[:5]\nrange(0, 10, 2)\n>>> r[-1]\n18\n\nTesting range objects for equality with ``==`` and ``!=`` compares\nthem as sequences. That is, two range objects are considered equal if\nthey represent the same sequence of values. (Note that two range\nobjects that compare equal might have different ``start``, ``stop``\nand ``step`` attributes, for example ``range(0) == range(2, 1, 3)`` or\n``range(0, 3, 2) == range(0, 4, 2)``.)\n\nChanged in version 3.2: Implement the Sequence ABC. Support slicing\nand negative indices. Test ``int`` objects for membership in constant\ntime instead of iterating through all items.\n\nChanged in version 3.3: Define \'==\' and \'!=\' to compare range objects\nbased on the sequence of values they define (instead of comparing\nbased on object identity).\n\nNew in version 3.3: The ``start``, ``stop`` and ``step`` attributes.\n',
+ 'typesseq-mutable': "\nMutable Sequence Types\n**********************\n\nThe operations in the following table are defined on mutable sequence\ntypes. The ``collections.abc.MutableSequence`` ABC is provided to make\nit easier to correctly implement these operations on custom sequence\ntypes.\n\nIn the table *s* is an instance of a mutable sequence type, *t* is any\niterable object and *x* is an arbitrary object that meets any type and\nvalue restrictions imposed by *s* (for example, ``bytearray`` only\naccepts integers that meet the value restriction ``0 <= x <= 255``).\n\n+--------------------------------+----------------------------------+-----------------------+\n| Operation | Result | Notes |\n+================================+==================================+=======================+\n| ``s[i] = x`` | item *i* of *s* is replaced by | |\n| | *x* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s[i:j] = t`` | slice of *s* from *i* to *j* is | |\n| | replaced by the contents of the | |\n| | iterable *t* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``del s[i:j]`` | same as ``s[i:j] = []`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s[i:j:k] = t`` | the elements of ``s[i:j:k]`` are | (1) |\n| | replaced by those of *t* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``del s[i:j:k]`` | removes the elements of | |\n| | ``s[i:j:k]`` from the list | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.append(x)`` | appends *x* to the end of the | |\n| | sequence (same as | |\n| | ``s[len(s):len(s)] = [x]``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.clear()`` | removes all items from ``s`` | (5) |\n| | (same as ``del s[:]``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.copy()`` | creates a shallow copy of ``s`` | (5) |\n| | (same as ``s[:]``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.extend(t)`` | extends *s* with the contents of | |\n| | *t* (same as ``s[len(s):len(s)] | |\n| | = t``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.insert(i, x)`` | inserts *x* into *s* at the | |\n| | index given by *i* (same as | |\n| | ``s[i:i] = [x]``) | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.pop([i])`` | retrieves the item at *i* and | (2) |\n| | also removes it from *s* | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.remove(x)`` | remove the first item from *s* | (3) |\n| | where ``s[i] == x`` | |\n+--------------------------------+----------------------------------+-----------------------+\n| ``s.reverse()`` | reverses the items of *s* in | (4) |\n| | place | |\n+--------------------------------+----------------------------------+-----------------------+\n\nNotes:\n\n1. *t* must have the same length as the slice it is replacing.\n\n2. The optional argument *i* defaults to ``-1``, so that by default\n the last item is removed and returned.\n\n3. ``remove`` raises ``ValueError`` when *x* is not found in *s*.\n\n4. The ``reverse()`` method modifies the sequence in place for economy\n of space when reversing a large sequence. To remind users that it\n operates by side effect, it does not return the reversed sequence.\n\n5. ``clear()`` and ``copy()`` are included for consistency with the\n interfaces of mutable containers that don't support slicing\n operations (such as ``dict`` and ``set``)\n\n New in version 3.3: ``clear()`` and ``copy()`` methods.\n",
'unary': '\nUnary arithmetic and bitwise operations\n***************************************\n\nAll unary arithmetic and bitwise operations have the same priority:\n\n u_expr ::= power | "-" u_expr | "+" u_expr | "~" u_expr\n\nThe unary ``-`` (minus) operator yields the negation of its numeric\nargument.\n\nThe unary ``+`` (plus) operator yields its numeric argument unchanged.\n\nThe unary ``~`` (invert) operator yields the bitwise inversion of its\ninteger argument. The bitwise inversion of ``x`` is defined as\n``-(x+1)``. It only applies to integral numbers.\n\nIn all three cases, if the argument does not have the proper type, a\n``TypeError`` exception is raised.\n',
'while': '\nThe ``while`` statement\n***********************\n\nThe ``while`` statement is used for repeated execution as long as an\nexpression is true:\n\n while_stmt ::= "while" expression ":" suite\n ["else" ":" suite]\n\nThis repeatedly tests the expression and, if it is true, executes the\nfirst suite; if the expression is false (which may be the first time\nit is tested) the suite of the ``else`` clause, if present, is\nexecuted and the loop terminates.\n\nA ``break`` statement executed in the first suite terminates the loop\nwithout executing the ``else`` clause\'s suite. A ``continue``\nstatement executed in the first suite skips the rest of the suite and\ngoes back to testing the expression.\n',
'with': '\nThe ``with`` statement\n**********************\n\nThe ``with`` statement is used to wrap the execution of a block with\nmethods defined by a context manager (see section *With Statement\nContext Managers*). This allows common\n``try``...``except``...``finally`` usage patterns to be encapsulated\nfor convenient reuse.\n\n with_stmt ::= "with" with_item ("," with_item)* ":" suite\n with_item ::= expression ["as" target]\n\nThe execution of the ``with`` statement with one "item" proceeds as\nfollows:\n\n1. The context expression (the expression given in the ``with_item``)\n is evaluated to obtain a context manager.\n\n2. The context manager\'s ``__exit__()`` is loaded for later use.\n\n3. The context manager\'s ``__enter__()`` method is invoked.\n\n4. If a target was included in the ``with`` statement, the return\n value from ``__enter__()`` is assigned to it.\n\n Note: The ``with`` statement guarantees that if the ``__enter__()``\n method returns without an error, then ``__exit__()`` will always\n be called. Thus, if an error occurs during the assignment to the\n target list, it will be treated the same as an error occurring\n within the suite would be. See step 6 below.\n\n5. The suite is executed.\n\n6. The context manager\'s ``__exit__()`` method is invoked. If an\n exception caused the suite to be exited, its type, value, and\n traceback are passed as arguments to ``__exit__()``. Otherwise,\n three ``None`` arguments are supplied.\n\n If the suite was exited due to an exception, and the return value\n from the ``__exit__()`` method was false, the exception is\n reraised. If the return value was true, the exception is\n suppressed, and execution continues with the statement following\n the ``with`` statement.\n\n If the suite was exited for any reason other than an exception, the\n return value from ``__exit__()`` is ignored, and execution proceeds\n at the normal location for the kind of exit that was taken.\n\nWith more than one item, the context managers are processed as if\nmultiple ``with`` statements were nested:\n\n with A() as a, B() as b:\n suite\n\nis equivalent to\n\n with A() as a:\n with B() as b:\n suite\n\nChanged in version 3.1: Support for multiple context expressions.\n\nSee also:\n\n **PEP 0343** - The "with" statement\n The specification, background, and examples for the Python\n ``with`` statement.\n',
- 'yield': '\nThe ``yield`` statement\n***********************\n\n yield_stmt ::= yield_expression\n\nThe ``yield`` statement is only used when defining a generator\nfunction, and is only used in the body of the generator function.\nUsing a ``yield`` statement in a function definition is sufficient to\ncause that definition to create a generator function instead of a\nnormal function. When a generator function is called, it returns an\niterator known as a generator iterator, or more commonly, a generator.\nThe body of the generator function is executed by calling the\n``next()`` function on the generator repeatedly until it raises an\nexception.\n\nWhen a ``yield`` statement is executed, the state of the generator is\nfrozen and the value of ``expression_list`` is returned to\n``next()``\'s caller. By "frozen" we mean that all local state is\nretained, including the current bindings of local variables, the\ninstruction pointer, and the internal evaluation stack: enough\ninformation is saved so that the next time ``next()`` is invoked, the\nfunction can proceed exactly as if the ``yield`` statement were just\nanother external call.\n\nThe ``yield`` statement is allowed in the ``try`` clause of a ``try``\n... ``finally`` construct. If the generator is not resumed before it\nis finalized (by reaching a zero reference count or by being garbage\ncollected), the generator-iterator\'s ``close()`` method will be\ncalled, allowing any pending ``finally`` clauses to execute.\n\nSee also:\n\n **PEP 0255** - Simple Generators\n The proposal for adding generators and the ``yield`` statement\n to Python.\n\n **PEP 0342** - Coroutines via Enhanced Generators\n The proposal that, among other generator enhancements, proposed\n allowing ``yield`` to appear inside a ``try`` ... ``finally``\n block.\n'}
+ 'yield': '\nThe ``yield`` statement\n***********************\n\n yield_stmt ::= yield_expression\n\nThe ``yield`` statement is only used when defining a generator\nfunction, and is only used in the body of the generator function.\nUsing a ``yield`` statement in a function definition is sufficient to\ncause that definition to create a generator function instead of a\nnormal function.\n\nWhen a generator function is called, it returns an iterator known as a\ngenerator iterator, or more commonly, a generator. The body of the\ngenerator function is executed by calling the ``next()`` function on\nthe generator repeatedly until it raises an exception.\n\nWhen a ``yield`` statement is executed, the state of the generator is\nfrozen and the value of ``expression_list`` is returned to\n``next()``\'s caller. By "frozen" we mean that all local state is\nretained, including the current bindings of local variables, the\ninstruction pointer, and the internal evaluation stack: enough\ninformation is saved so that the next time ``next()`` is invoked, the\nfunction can proceed exactly as if the ``yield`` statement were just\nanother external call.\n\nThe ``yield`` statement is allowed in the ``try`` clause of a ``try``\n... ``finally`` construct. If the generator is not resumed before it\nis finalized (by reaching a zero reference count or by being garbage\ncollected), the generator-iterator\'s ``close()`` method will be\ncalled, allowing any pending ``finally`` clauses to execute.\n\nWhen ``yield from <expr>`` is used, it treats the supplied expression\nas a subiterator, producing values from it until the underlying\niterator is exhausted.\n\n Changed in version 3.3: Added ``yield from <expr>`` to delegate\n control flow to a subiterator\n\nFor full details of ``yield`` semantics, refer to the *Yield\nexpressions* section.\n\nSee also:\n\n **PEP 0255** - Simple Generators\n The proposal for adding generators and the ``yield`` statement\n to Python.\n\n **PEP 0342** - Coroutines via Enhanced Generators\n The proposal to enhance the API and syntax of generators, making\n them usable as simple coroutines.\n\n **PEP 0380** - Syntax for Delegating to a Subgenerator\n The proposal to introduce the ``yield_from`` syntax, making\n delegation to sub-generators easy.\n'}
diff --git a/Lib/queue.py b/Lib/queue.py
index bee7ed4..c3296fe 100644
--- a/Lib/queue.py
+++ b/Lib/queue.py
@@ -1,49 +1,57 @@
-"""A multi-producer, multi-consumer queue."""
+'''A multi-producer, multi-consumer queue.'''
-from time import time as _time
try:
- import threading as _threading
+ import threading
except ImportError:
- import dummy_threading as _threading
+ import dummy_threading as threading
from collections import deque
-import heapq
+from heapq import heappush, heappop
+try:
+ from time import monotonic as time
+except ImportError:
+ from time import time
__all__ = ['Empty', 'Full', 'Queue', 'PriorityQueue', 'LifoQueue']
class Empty(Exception):
- "Exception raised by Queue.get(block=0)/get_nowait()."
+ 'Exception raised by Queue.get(block=0)/get_nowait().'
pass
class Full(Exception):
- "Exception raised by Queue.put(block=0)/put_nowait()."
+ 'Exception raised by Queue.put(block=0)/put_nowait().'
pass
class Queue:
- """Create a queue object with a given maximum size.
+ '''Create a queue object with a given maximum size.
If maxsize is <= 0, the queue size is infinite.
- """
+ '''
+
def __init__(self, maxsize=0):
self.maxsize = maxsize
self._init(maxsize)
+
# mutex must be held whenever the queue is mutating. All methods
# that acquire mutex must release it before returning. mutex
# is shared between the three conditions, so acquiring and
# releasing the conditions also acquires and releases mutex.
- self.mutex = _threading.Lock()
+ self.mutex = threading.Lock()
+
# Notify not_empty whenever an item is added to the queue; a
# thread waiting to get is notified then.
- self.not_empty = _threading.Condition(self.mutex)
+ self.not_empty = threading.Condition(self.mutex)
+
# Notify not_full whenever an item is removed from the queue;
# a thread waiting to put is notified then.
- self.not_full = _threading.Condition(self.mutex)
+ self.not_full = threading.Condition(self.mutex)
+
# Notify all_tasks_done whenever the number of unfinished tasks
# drops to zero; thread waiting to join() is notified to resume
- self.all_tasks_done = _threading.Condition(self.mutex)
+ self.all_tasks_done = threading.Condition(self.mutex)
self.unfinished_tasks = 0
def task_done(self):
- """Indicate that a formerly enqueued task is complete.
+ '''Indicate that a formerly enqueued task is complete.
Used by Queue consumer threads. For each get() used to fetch a task,
a subsequent call to task_done() tells the queue that the processing
@@ -55,43 +63,35 @@ class Queue:
Raises a ValueError if called more times than there were items
placed in the queue.
- """
- self.all_tasks_done.acquire()
- try:
+ '''
+ with self.all_tasks_done:
unfinished = self.unfinished_tasks - 1
if unfinished <= 0:
if unfinished < 0:
raise ValueError('task_done() called too many times')
self.all_tasks_done.notify_all()
self.unfinished_tasks = unfinished
- finally:
- self.all_tasks_done.release()
def join(self):
- """Blocks until all items in the Queue have been gotten and processed.
+ '''Blocks until all items in the Queue have been gotten and processed.
The count of unfinished tasks goes up whenever an item is added to the
queue. The count goes down whenever a consumer thread calls task_done()
to indicate the item was retrieved and all work on it is complete.
When the count of unfinished tasks drops to zero, join() unblocks.
- """
- self.all_tasks_done.acquire()
- try:
+ '''
+ with self.all_tasks_done:
while self.unfinished_tasks:
self.all_tasks_done.wait()
- finally:
- self.all_tasks_done.release()
def qsize(self):
- """Return the approximate size of the queue (not reliable!)."""
- self.mutex.acquire()
- n = self._qsize()
- self.mutex.release()
- return n
+ '''Return the approximate size of the queue (not reliable!).'''
+ with self.mutex:
+ return self._qsize()
def empty(self):
- """Return True if the queue is empty, False otherwise (not reliable!).
+ '''Return True if the queue is empty, False otherwise (not reliable!).
This method is likely to be removed at some point. Use qsize() == 0
as a direct substitute, but be aware that either approach risks a race
@@ -100,29 +100,23 @@ class Queue:
To create code that needs to wait for all queued tasks to be
completed, the preferred technique is to use the join() method.
-
- """
- self.mutex.acquire()
- n = not self._qsize()
- self.mutex.release()
- return n
+ '''
+ with self.mutex:
+ return not self._qsize()
def full(self):
- """Return True if the queue is full, False otherwise (not reliable!).
+ '''Return True if the queue is full, False otherwise (not reliable!).
This method is likely to be removed at some point. Use qsize() >= n
as a direct substitute, but be aware that either approach risks a race
condition where a queue can shrink before the result of full() or
qsize() can be used.
-
- """
- self.mutex.acquire()
- n = 0 < self.maxsize <= self._qsize()
- self.mutex.release()
- return n
+ '''
+ with self.mutex:
+ return 0 < self.maxsize <= self._qsize()
def put(self, item, block=True, timeout=None):
- """Put an item into the queue.
+ '''Put an item into the queue.
If optional args 'block' is true and 'timeout' is None (the default),
block if necessary until a free slot is available. If 'timeout' is
@@ -131,9 +125,8 @@ class Queue:
Otherwise ('block' is false), put an item on the queue if a free slot
is immediately available, else raise the Full exception ('timeout'
is ignored in that case).
- """
- self.not_full.acquire()
- try:
+ '''
+ with self.not_full:
if self.maxsize > 0:
if not block:
if self._qsize() >= self.maxsize:
@@ -144,28 +137,18 @@ class Queue:
elif timeout < 0:
raise ValueError("'timeout' must be a positive number")
else:
- endtime = _time() + timeout
+ endtime = time() + timeout
while self._qsize() >= self.maxsize:
- remaining = endtime - _time()
+ remaining = endtime - time()
if remaining <= 0.0:
raise Full
self.not_full.wait(remaining)
self._put(item)
self.unfinished_tasks += 1
self.not_empty.notify()
- finally:
- self.not_full.release()
-
- def put_nowait(self, item):
- """Put an item into the queue without blocking.
-
- Only enqueue the item if a free slot is immediately available.
- Otherwise raise the Full exception.
- """
- return self.put(item, False)
def get(self, block=True, timeout=None):
- """Remove and return an item from the queue.
+ '''Remove and return an item from the queue.
If optional args 'block' is true and 'timeout' is None (the default),
block if necessary until an item is available. If 'timeout' is
@@ -174,9 +157,8 @@ class Queue:
Otherwise ('block' is false), return an item if one is immediately
available, else raise the Empty exception ('timeout' is ignored
in that case).
- """
- self.not_empty.acquire()
- try:
+ '''
+ with self.not_empty:
if not block:
if not self._qsize():
raise Empty
@@ -186,25 +168,31 @@ class Queue:
elif timeout < 0:
raise ValueError("'timeout' must be a positive number")
else:
- endtime = _time() + timeout
+ endtime = time() + timeout
while not self._qsize():
- remaining = endtime - _time()
+ remaining = endtime - time()
if remaining <= 0.0:
raise Empty
self.not_empty.wait(remaining)
item = self._get()
self.not_full.notify()
return item
- finally:
- self.not_empty.release()
+
+ def put_nowait(self, item):
+ '''Put an item into the queue without blocking.
+
+ Only enqueue the item if a free slot is immediately available.
+ Otherwise raise the Full exception.
+ '''
+ return self.put(item, block=False)
def get_nowait(self):
- """Remove and return an item from the queue without blocking.
+ '''Remove and return an item from the queue without blocking.
Only get an item if one is immediately available. Otherwise
raise the Empty exception.
- """
- return self.get(False)
+ '''
+ return self.get(block=False)
# Override these methods to implement other queue organizations
# (e.g. stack or priority queue).
@@ -214,7 +202,7 @@ class Queue:
def _init(self, maxsize):
self.queue = deque()
- def _qsize(self, len=len):
+ def _qsize(self):
return len(self.queue)
# Put a new item in the queue
@@ -235,13 +223,13 @@ class PriorityQueue(Queue):
def _init(self, maxsize):
self.queue = []
- def _qsize(self, len=len):
+ def _qsize(self):
return len(self.queue)
- def _put(self, item, heappush=heapq.heappush):
+ def _put(self, item):
heappush(self.queue, item)
- def _get(self, heappop=heapq.heappop):
+ def _get(self):
return heappop(self.queue)
@@ -251,7 +239,7 @@ class LifoQueue(Queue):
def _init(self, maxsize):
self.queue = []
- def _qsize(self, len=len):
+ def _qsize(self):
return len(self.queue)
def _put(self, item):
diff --git a/Lib/random.py b/Lib/random.py
index 9b61208..6388f29 100644
--- a/Lib/random.py
+++ b/Lib/random.py
@@ -41,7 +41,7 @@ from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethod
from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil
from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
from os import urandom as _urandom
-from collections import Set as _Set, Sequence as _Sequence
+from collections.abc import Set as _Set, Sequence as _Sequence
from hashlib import sha512 as _sha512
__all__ = ["Random","seed","random","uniform","randint","choice","sample",
diff --git a/Lib/re.py b/Lib/re.py
index 85c5a57..9ae5174 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -215,7 +215,7 @@ def compile(pattern, flags=0):
def purge():
"Clear the regular expression caches"
- _compile_typed.cache_clear()
+ _compile.cache_clear()
_compile_repl.cache_clear()
def template(pattern, flags=0):
@@ -223,12 +223,14 @@ def template(pattern, flags=0):
return _compile(pattern, flags|T)
_alphanum_str = frozenset(
- "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
+ "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
_alphanum_bytes = frozenset(
- b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
+ b"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
def escape(pattern):
- "Escape all non-alphanumeric characters in pattern."
+ """
+ Escape all the characters in pattern except ASCII letters, numbers and '_'.
+ """
if isinstance(pattern, str):
alphanum = _alphanum_str
s = list(pattern)
@@ -259,11 +261,8 @@ def escape(pattern):
_pattern_type = type(sre_compile.compile("", 0))
+@functools.lru_cache(maxsize=500, typed=True)
def _compile(pattern, flags):
- return _compile_typed(type(pattern), pattern, flags)
-
-@functools.lru_cache(maxsize=500)
-def _compile_typed(text_bytes_type, pattern, flags):
# internal: compile pattern
if isinstance(pattern, _pattern_type):
if flags:
diff --git a/Lib/runpy.py b/Lib/runpy.py
index 7cb4668..39c0e9f 100644
--- a/Lib/runpy.py
+++ b/Lib/runpy.py
@@ -9,13 +9,12 @@ importers when locating support scripts as well as when importing modules.
# Written by Nick Coghlan <ncoghlan at gmail.com>
# to implement PEP 338 (Executing Modules as Scripts)
+
+import os
import sys
+import importlib.machinery # importlib first so we can test #15386 via -m
import imp
-from pkgutil import read_code
-try:
- from imp import get_loader
-except ImportError:
- from pkgutil import get_loader
+from pkgutil import read_code, get_loader, get_importer
__all__ = [
"run_module", "run_path",
@@ -95,7 +94,7 @@ def _get_filename(loader, mod_name):
for attr in ("get_filename", "_get_filename"):
meth = getattr(loader, attr, None)
if meth is not None:
- return meth(mod_name)
+ return os.path.abspath(meth(mod_name))
return None
# Helper to get the loader, code and filename for a module
@@ -181,47 +180,23 @@ def run_module(mod_name, init_globals=None,
def _get_main_module_details():
# Helper that gives a nicer error message when attempting to
# execute a zipfile or directory by invoking __main__.py
+ # Also moves the standard __main__ out of the way so that the
+ # preexisting __loader__ entry doesn't cause issues
main_name = "__main__"
+ saved_main = sys.modules[main_name]
+ del sys.modules[main_name]
try:
return _get_module_details(main_name)
except ImportError as exc:
if main_name in str(exc):
raise ImportError("can't find %r module in %r" %
- (main_name, sys.path[0]))
+ (main_name, sys.path[0])) from exc
raise
+ finally:
+ sys.modules[main_name] = saved_main
-# XXX (ncoghlan): Perhaps expose the C API function
-# as imp.get_importer instead of reimplementing it in Python?
-def _get_importer(path_name):
- """Python version of PyImport_GetImporter C API function"""
- cache = sys.path_importer_cache
- try:
- importer = cache[path_name]
- except KeyError:
- # Not yet cached. Flag as using the
- # standard machinery until we finish
- # checking the hooks
- cache[path_name] = None
- for hook in sys.path_hooks:
- try:
- importer = hook(path_name)
- break
- except ImportError:
- pass
- else:
- # The following check looks a bit odd. The trick is that
- # NullImporter throws ImportError if the supplied path is a
- # *valid* directory entry (and hence able to be handled
- # by the standard import machinery)
- try:
- importer = imp.NullImporter(path_name)
- except ImportError:
- return None
- cache[path_name] = importer
- return importer
-
-def _get_code_from_file(fname):
+def _get_code_from_file(run_name, fname):
# Check for a compiled file first
with open(fname, "rb") as f:
code = read_code(f)
@@ -229,7 +204,10 @@ def _get_code_from_file(fname):
# That didn't work, so try it as normal source code
with open(fname, "rb") as f:
code = compile(f.read(), fname, 'exec')
- return code
+ loader = importlib.machinery.SourceFileLoader(run_name, fname)
+ else:
+ loader = importlib.machinery.SourcelessFileLoader(run_name, fname)
+ return code, loader
def run_path(path_name, init_globals=None, run_name=None):
"""Execute code located at the specified filesystem location
@@ -244,13 +222,13 @@ def run_path(path_name, init_globals=None, run_name=None):
if run_name is None:
run_name = "<run_path>"
pkg_name = run_name.rpartition(".")[0]
- importer = _get_importer(path_name)
- if isinstance(importer, imp.NullImporter):
+ importer = get_importer(path_name)
+ if isinstance(importer, (type(None), imp.NullImporter)):
# Not a valid sys.path entry, so run the code directly
# execfile() doesn't help as we want to allow compiled files
- code = _get_code_from_file(path_name)
+ code, mod_loader = _get_code_from_file(run_name, path_name)
return _run_module_code(code, init_globals, run_name, path_name,
- pkg_name=pkg_name)
+ mod_loader, pkg_name)
else:
# Importer is defined for path, so add it to
# the start of sys.path
@@ -262,13 +240,7 @@ def run_path(path_name, init_globals=None, run_name=None):
# have no choice and we have to remove it even while we read the
# code. If we don't do this, a __loader__ attribute in the
# existing __main__ module may prevent location of the new module.
- main_name = "__main__"
- saved_main = sys.modules[main_name]
- del sys.modules[main_name]
- try:
- mod_name, loader, code, fname = _get_main_module_details()
- finally:
- sys.modules[main_name] = saved_main
+ mod_name, loader, code, fname = _get_main_module_details()
with _TempModule(run_name) as temp_module, \
_ModifiedArgv0(path_name):
mod_globals = temp_module.module.__dict__
diff --git a/Lib/sched.py b/Lib/sched.py
index a119892..5551f71 100644
--- a/Lib/sched.py
+++ b/Lib/sched.py
@@ -28,12 +28,21 @@ has another way to reference private data (besides global variables).
# XXX instead of having to define a module or class just to hold
# XXX the global state of your particular time and delay functions.
+import time
import heapq
from collections import namedtuple
+try:
+ import threading
+except ImportError:
+ import dummy_threading as threading
+try:
+ from time import monotonic as _time
+except ImportError:
+ from time import time as _time
__all__ = ["scheduler"]
-class Event(namedtuple('Event', 'time, priority, action, argument')):
+class Event(namedtuple('Event', 'time, priority, action, argument, kwargs')):
def __eq__(s, o): return (s.time, s.priority) == (o.time, o.priority)
def __ne__(s, o): return (s.time, s.priority) != (o.time, o.priority)
def __lt__(s, o): return (s.time, s.priority) < (o.time, o.priority)
@@ -42,32 +51,36 @@ class Event(namedtuple('Event', 'time, priority, action, argument')):
def __ge__(s, o): return (s.time, s.priority) >= (o.time, o.priority)
class scheduler:
- def __init__(self, timefunc, delayfunc):
+
+ def __init__(self, timefunc=_time, delayfunc=time.sleep):
"""Initialize a new instance, passing the time and delay
functions"""
self._queue = []
+ self._lock = threading.RLock()
self.timefunc = timefunc
self.delayfunc = delayfunc
- def enterabs(self, time, priority, action, argument):
+ def enterabs(self, time, priority, action, argument=[], kwargs={}):
"""Enter a new event in the queue at an absolute time.
Returns an ID for the event which can be used to remove it,
if necessary.
"""
- event = Event(time, priority, action, argument)
- heapq.heappush(self._queue, event)
- return event # The ID
+ with self._lock:
+ event = Event(time, priority, action, argument, kwargs)
+ heapq.heappush(self._queue, event)
+ return event # The ID
- def enter(self, delay, priority, action, argument):
+ def enter(self, delay, priority, action, argument=[], kwargs={}):
"""A variant that specifies the time as a relative time.
This is actually the more commonly used interface.
"""
- time = self.timefunc() + delay
- return self.enterabs(time, priority, action, argument)
+ with self._lock:
+ time = self.timefunc() + delay
+ return self.enterabs(time, priority, action, argument, kwargs)
def cancel(self, event):
"""Remove an event from the queue.
@@ -76,15 +89,20 @@ class scheduler:
If the event is not in the queue, this raises ValueError.
"""
- self._queue.remove(event)
- heapq.heapify(self._queue)
+ with self._lock:
+ self._queue.remove(event)
+ heapq.heapify(self._queue)
def empty(self):
"""Check whether the queue is empty."""
- return not self._queue
+ with self._lock:
+ return not self._queue
- def run(self):
+ def run(self, blocking=True):
"""Execute events until the queue is empty.
+ If blocking is False executes the scheduled events due to
+ expire soonest (if any) and then return the deadline of the
+ next scheduled call in the scheduler.
When there is a positive delay until the first event, the
delay function is called and the event is left in the queue;
@@ -106,24 +124,27 @@ class scheduler:
"""
# localize variable access to minimize overhead
# and to improve thread safety
- q = self._queue
- delayfunc = self.delayfunc
- timefunc = self.timefunc
- pop = heapq.heappop
- while q:
- time, priority, action, argument = checked_event = q[0]
- now = timefunc()
- if now < time:
- delayfunc(time - now)
- else:
- event = pop(q)
- # Verify that the event was not removed or altered
- # by another thread after we last looked at q[0].
- if event is checked_event:
- action(*argument)
- delayfunc(0) # Let other threads run
+ with self._lock:
+ q = self._queue
+ delayfunc = self.delayfunc
+ timefunc = self.timefunc
+ pop = heapq.heappop
+ while q:
+ time, priority, action, argument, kwargs = checked_event = q[0]
+ now = timefunc()
+ if now < time:
+ if not blocking:
+ return time - now
+ delayfunc(time - now)
else:
- heapq.heappush(q, event)
+ event = pop(q)
+ # Verify that the event was not removed or altered
+ # by another thread after we last looked at q[0].
+ if event is checked_event:
+ action(*argument, **kwargs)
+ delayfunc(0) # Let other threads run
+ else:
+ heapq.heappush(q, event)
@property
def queue(self):
@@ -136,5 +157,6 @@ class scheduler:
# Use heapq to sort the queue rather than using 'sorted(self._queue)'.
# With heapq, two events scheduled at the same time will show in
# the actual order they would be retrieved.
- events = self._queue[:]
- return map(heapq.heappop, [events]*len(events))
+ with self._lock:
+ events = self._queue[:]
+ return map(heapq.heappop, [events]*len(events))
diff --git a/Lib/shlex.py b/Lib/shlex.py
index 3edd3db..69f3b45 100644
--- a/Lib/shlex.py
+++ b/Lib/shlex.py
@@ -6,13 +6,14 @@
# Posix compliance, split(), string arguments, and
# iterator interface by Gustavo Niemeyer, April 2003.
-import os.path
+import os
+import re
import sys
from collections import deque
from io import StringIO
-__all__ = ["shlex", "split"]
+__all__ = ["shlex", "split", "quote"]
class shlex:
"A lexical analyzer class for simple shell-like syntaxes."
@@ -274,6 +275,21 @@ def split(s, comments=False, posix=True):
lex.commenters = ''
return list(lex)
+
+_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
+
+def quote(s):
+ """Return a shell-escaped version of the string *s*."""
+ if not s:
+ return "''"
+ if _find_unsafe(s) is None:
+ return s
+
+ # use single quotes, and put single quotes into double quotes
+ # the string $'b is then quoted as '$'"'"'b'
+ return "'" + s.replace("'", "'\"'\"'") + "'"
+
+
if __name__ == '__main__':
if len(sys.argv) == 1:
lexer = shlex()
diff --git a/Lib/shutil.py b/Lib/shutil.py
index ef29ae2..5dc311e 100644
--- a/Lib/shutil.py
+++ b/Lib/shutil.py
@@ -15,6 +15,7 @@ import tarfile
try:
import bz2
+ del bz2
_BZ2_SUPPORTED = True
except ImportError:
_BZ2_SUPPORTED = False
@@ -34,7 +35,9 @@ __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
"ExecError", "make_archive", "get_archive_formats",
"register_archive_format", "unregister_archive_format",
"get_unpack_formats", "register_unpack_format",
- "unregister_unpack_format", "unpack_archive", "ignore_patterns"]
+ "unregister_unpack_format", "unpack_archive",
+ "ignore_patterns", "chown", "which"]
+ # disk_usage is added later, if available on the platform
class Error(EnvironmentError):
pass
@@ -79,8 +82,13 @@ def _samefile(src, dst):
return (os.path.normcase(os.path.abspath(src)) ==
os.path.normcase(os.path.abspath(dst)))
-def copyfile(src, dst):
- """Copy data from src to dst"""
+def copyfile(src, dst, *, follow_symlinks=True):
+ """Copy data from src to dst.
+
+ If follow_symlinks is not set and src is a symbolic link, a new
+ symlink will be created instead of copying the file it points to.
+
+ """
if _samefile(src, dst):
raise Error("`%s` and `%s` are the same file" % (src, dst))
@@ -95,56 +103,140 @@ def copyfile(src, dst):
if stat.S_ISFIFO(st.st_mode):
raise SpecialFileError("`%s` is a named pipe" % fn)
- with open(src, 'rb') as fsrc:
- with open(dst, 'wb') as fdst:
- copyfileobj(fsrc, fdst)
+ if not follow_symlinks and os.path.islink(src):
+ os.symlink(os.readlink(src), dst)
+ else:
+ with open(src, 'rb') as fsrc:
+ with open(dst, 'wb') as fdst:
+ copyfileobj(fsrc, fdst)
+ return dst
+
+def copymode(src, dst, *, follow_symlinks=True):
+ """Copy mode bits from src to dst.
+
+ If follow_symlinks is not set, symlinks aren't followed if and only
+ if both `src` and `dst` are symlinks. If `lchmod` isn't available
+ (e.g. Linux) this method does nothing.
+
+ """
+ if not follow_symlinks and os.path.islink(src) and os.path.islink(dst):
+ if hasattr(os, 'lchmod'):
+ stat_func, chmod_func = os.lstat, os.lchmod
+ else:
+ return
+ elif hasattr(os, 'chmod'):
+ stat_func, chmod_func = os.stat, os.chmod
+ else:
+ return
+
+ st = stat_func(src)
+ chmod_func(dst, stat.S_IMODE(st.st_mode))
+
+if hasattr(os, 'listxattr'):
+ def _copyxattr(src, dst, *, follow_symlinks=True):
+ """Copy extended filesystem attributes from `src` to `dst`.
+
+ Overwrite existing attributes.
+
+ If `follow_symlinks` is false, symlinks won't be followed.
-def copymode(src, dst):
- """Copy mode bits from src to dst"""
- if hasattr(os, 'chmod'):
- st = os.stat(src)
- mode = stat.S_IMODE(st.st_mode)
- os.chmod(dst, mode)
+ """
-def copystat(src, dst):
- """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
- st = os.stat(src)
+ for name in os.listxattr(src, follow_symlinks=follow_symlinks):
+ try:
+ value = os.getxattr(src, name, follow_symlinks=follow_symlinks)
+ os.setxattr(dst, name, value, follow_symlinks=follow_symlinks)
+ except OSError as e:
+ if e.errno not in (errno.EPERM, errno.ENOTSUP, errno.ENODATA):
+ raise
+else:
+ def _copyxattr(*args, **kwargs):
+ pass
+
+def copystat(src, dst, *, follow_symlinks=True):
+ """Copy all stat info (mode bits, atime, mtime, flags) from src to dst.
+
+ If the optional flag `follow_symlinks` is not set, symlinks aren't followed if and
+ only if both `src` and `dst` are symlinks.
+
+ """
+ def _nop(*args, ns=None, follow_symlinks=None):
+ pass
+
+ # follow symlinks (aka don't not follow symlinks)
+ follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst))
+ if follow:
+ # use the real function if it exists
+ def lookup(name):
+ return getattr(os, name, _nop)
+ else:
+ # use the real function only if it exists
+ # *and* it supports follow_symlinks
+ def lookup(name):
+ fn = getattr(os, name, _nop)
+ if fn in os.supports_follow_symlinks:
+ return fn
+ return _nop
+
+ st = lookup("stat")(src, follow_symlinks=follow)
mode = stat.S_IMODE(st.st_mode)
- if hasattr(os, 'utime'):
- os.utime(dst, (st.st_atime, st.st_mtime))
- if hasattr(os, 'chmod'):
- os.chmod(dst, mode)
- if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
+ lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns),
+ follow_symlinks=follow)
+ try:
+ lookup("chmod")(dst, mode, follow_symlinks=follow)
+ except NotImplementedError:
+ # if we got a NotImplementedError, it's because
+ # * follow_symlinks=False,
+ # * lchown() is unavailable, and
+ # * either
+ # * fchownat() is unvailable or
+ # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW.
+ # (it returned ENOSUP.)
+ # therefore we're out of options--we simply cannot chown the
+ # symlink. give up, suppress the error.
+ # (which is what shutil always did in this circumstance.)
+ pass
+ if hasattr(st, 'st_flags'):
try:
- os.chflags(dst, st.st_flags)
+ lookup("chflags")(dst, st.st_flags, follow_symlinks=follow)
except OSError as why:
for err in 'EOPNOTSUPP', 'ENOTSUP':
if hasattr(errno, err) and why.errno == getattr(errno, err):
break
else:
raise
+ _copyxattr(src, dst, follow_symlinks=follow)
-def copy(src, dst):
- """Copy data and mode bits ("cp src dst").
+def copy(src, dst, *, follow_symlinks=True):
+ """Copy data and mode bits ("cp src dst"). Return the file's destination.
The destination may be a directory.
+ If follow_symlinks is false, symlinks won't be followed. This
+ resembles GNU's "cp -P src dst".
+
"""
if os.path.isdir(dst):
dst = os.path.join(dst, os.path.basename(src))
- copyfile(src, dst)
- copymode(src, dst)
+ copyfile(src, dst, follow_symlinks=follow_symlinks)
+ copymode(src, dst, follow_symlinks=follow_symlinks)
+ return dst
-def copy2(src, dst):
- """Copy data and all stat info ("cp -p src dst").
+def copy2(src, dst, *, follow_symlinks=True):
+ """Copy data and all stat info ("cp -p src dst"). Return the file's
+ destination."
The destination may be a directory.
+ If follow_symlinks is false, symlinks won't be followed. This
+ resembles GNU's "cp -P src dst".
+
"""
if os.path.isdir(dst):
dst = os.path.join(dst, os.path.basename(src))
- copyfile(src, dst)
- copystat(src, dst)
+ copyfile(src, dst, follow_symlinks=follow_symlinks)
+ copystat(src, dst, follow_symlinks=follow_symlinks)
+ return dst
def ignore_patterns(*patterns):
"""Function that can be used as copytree() ignore parameter.
@@ -211,7 +303,11 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
if os.path.islink(srcname):
linkto = os.readlink(srcname)
if symlinks:
+ # We can't just leave it to `copy_function` because legacy
+ # code with a custom `copy_function` may rely on copytree
+ # doing the right thing.
os.symlink(linkto, dstname)
+ copystat(srcname, dstname, follow_symlinks=not symlinks)
else:
# ignore dangling symlink if the flag is on
if not os.path.exists(linkto) and ignore_dangling_symlinks:
@@ -239,24 +335,10 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
errors.append((src, dst, str(why)))
if errors:
raise Error(errors)
+ return dst
-def rmtree(path, ignore_errors=False, onerror=None):
- """Recursively delete a directory tree.
-
- If ignore_errors is set, errors are ignored; otherwise, if onerror
- is set, it is called to handle the error with arguments (func,
- path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
- path is the argument to that function that caused it to fail; and
- exc_info is a tuple returned by sys.exc_info(). If ignore_errors
- is false and onerror is None, an exception is raised.
-
- """
- if ignore_errors:
- def onerror(*args):
- pass
- elif onerror is None:
- def onerror(*args):
- raise
+# version vulnerable to race conditions
+def _rmtree_unsafe(path, onerror):
try:
if os.path.islink(path):
# symlinks to directories are forbidden, see bug #1669
@@ -268,7 +350,7 @@ def rmtree(path, ignore_errors=False, onerror=None):
names = []
try:
names = os.listdir(path)
- except os.error as err:
+ except os.error:
onerror(os.listdir, path, sys.exc_info())
for name in names:
fullname = os.path.join(path, name)
@@ -277,17 +359,109 @@ def rmtree(path, ignore_errors=False, onerror=None):
except os.error:
mode = 0
if stat.S_ISDIR(mode):
- rmtree(fullname, ignore_errors, onerror)
+ _rmtree_unsafe(fullname, onerror)
else:
try:
- os.remove(fullname)
- except os.error as err:
- onerror(os.remove, fullname, sys.exc_info())
+ os.unlink(fullname)
+ except os.error:
+ onerror(os.unlink, fullname, sys.exc_info())
try:
os.rmdir(path)
except os.error:
onerror(os.rmdir, path, sys.exc_info())
+# Version using fd-based APIs to protect against races
+def _rmtree_safe_fd(topfd, path, onerror):
+ names = []
+ try:
+ names = os.listdir(topfd)
+ except os.error:
+ onerror(os.listdir, path, sys.exc_info())
+ for name in names:
+ fullname = os.path.join(path, name)
+ try:
+ orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
+ mode = orig_st.st_mode
+ except os.error:
+ mode = 0
+ if stat.S_ISDIR(mode):
+ try:
+ dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
+ except os.error:
+ onerror(os.open, fullname, sys.exc_info())
+ else:
+ try:
+ if os.path.samestat(orig_st, os.fstat(dirfd)):
+ _rmtree_safe_fd(dirfd, fullname, onerror)
+ try:
+ os.rmdir(name, dir_fd=topfd)
+ except os.error:
+ onerror(os.rmdir, fullname, sys.exc_info())
+ finally:
+ os.close(dirfd)
+ else:
+ try:
+ os.unlink(name, dir_fd=topfd)
+ except os.error:
+ onerror(os.unlink, fullname, sys.exc_info())
+
+_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
+ os.supports_dir_fd and
+ os.listdir in os.supports_fd and
+ os.stat in os.supports_follow_symlinks)
+
+def rmtree(path, ignore_errors=False, onerror=None):
+ """Recursively delete a directory tree.
+
+ If ignore_errors is set, errors are ignored; otherwise, if onerror
+ is set, it is called to handle the error with arguments (func,
+ path, exc_info) where func is platform and implementation dependent;
+ path is the argument to that function that caused it to fail; and
+ exc_info is a tuple returned by sys.exc_info(). If ignore_errors
+ is false and onerror is None, an exception is raised.
+
+ """
+ if ignore_errors:
+ def onerror(*args):
+ pass
+ elif onerror is None:
+ def onerror(*args):
+ raise
+ if _use_fd_functions:
+ # While the unsafe rmtree works fine on bytes, the fd based does not.
+ if isinstance(path, bytes):
+ path = os.fsdecode(path)
+ # Note: To guard against symlink races, we use the standard
+ # lstat()/open()/fstat() trick.
+ try:
+ orig_st = os.lstat(path)
+ except Exception:
+ onerror(os.lstat, path, sys.exc_info())
+ return
+ try:
+ fd = os.open(path, os.O_RDONLY)
+ except Exception:
+ onerror(os.lstat, path, sys.exc_info())
+ return
+ try:
+ if (stat.S_ISDIR(orig_st.st_mode) and
+ os.path.samestat(orig_st, os.fstat(fd))):
+ _rmtree_safe_fd(fd, path, onerror)
+ try:
+ os.rmdir(path)
+ except os.error:
+ onerror(os.rmdir, path, sys.exc_info())
+ else:
+ raise NotADirectoryError(20,
+ "Not a directory: '{}'".format(path))
+ finally:
+ os.close(fd)
+ else:
+ return _rmtree_unsafe(path, onerror)
+
+# Allow introspection of whether or not the hardening against symlink
+# attacks is supported on the current platform
+rmtree.avoids_symlink_attacks = _use_fd_functions
def _basename(path):
# A basename() variant which first strips the trailing slash, if present.
@@ -296,7 +470,8 @@ def _basename(path):
def move(src, dst):
"""Recursively move a file or directory to another location. This is
- similar to the Unix "mv" command.
+ similar to the Unix "mv" command. Return the file or directory's
+ destination.
If the destination is a directory or a symlink to a directory, the source
is moved inside the directory. The destination path must not already
@@ -306,7 +481,10 @@ def move(src, dst):
overwritten depending on os.rename() semantics.
If the destination is on our current filesystem, then rename() is used.
- Otherwise, src is copied to the destination and then removed.
+ Otherwise, src is copied to the destination and then removed. Symlinks are
+ recreated under the new name if os.rename() fails because of cross
+ filesystem renames.
+
A lot more could be done here... A look at a mv.c shows a lot of
the issues this implementation glosses over.
@@ -324,8 +502,12 @@ def move(src, dst):
raise Error("Destination path '%s' already exists" % real_dst)
try:
os.rename(src, real_dst)
- except OSError as exc:
- if os.path.isdir(src):
+ except OSError:
+ if os.path.islink(src):
+ linkto = os.readlink(src)
+ os.symlink(linkto, real_dst)
+ os.unlink(src)
+ elif os.path.isdir(src):
if _destinsrc(src, dst):
raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
copytree(src, real_dst, symlinks=True)
@@ -333,6 +515,7 @@ def move(src, dst):
else:
copy2(src, real_dst)
os.unlink(src)
+ return real_dst
def _destinsrc(src, dst):
src = abspath(src)
@@ -391,7 +574,7 @@ def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
compress_ext['bzip2'] = '.bz2'
# flags for compression program, each element of list will be an argument
- if compress is not None and compress not in compress_ext.keys():
+ if compress is not None and compress not in compress_ext:
raise ValueError("bad value for 'compress', or compression format not "
"supported : {0}".format(compress))
@@ -496,7 +679,7 @@ def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
_ARCHIVE_FORMATS = {
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
- 'zip': (_make_zipfile, [],"ZIP file")
+ 'zip': (_make_zipfile, [], "ZIP file")
}
if _BZ2_SUPPORTED:
@@ -529,7 +712,7 @@ def register_archive_format(name, function, extra_args=None, description=''):
if not isinstance(extra_args, (tuple, list)):
raise TypeError('extra_args needs to be a sequence')
for element in extra_args:
- if not isinstance(element, (tuple, list)) or len(element) !=2 :
+ if not isinstance(element, (tuple, list)) or len(element) !=2:
raise TypeError('extra_args elements are : (arg_name, value)')
_ARCHIVE_FORMATS[name] = (function, extra_args, description)
@@ -681,7 +864,7 @@ def _unpack_zipfile(filename, extract_dir):
if not name.endswith('/'):
# file
data = zip.read(info.filename)
- f = open(target,'wb')
+ f = open(target, 'wb')
try:
f.write(data)
finally:
@@ -755,3 +938,165 @@ def unpack_archive(filename, extract_dir=None, format=None):
func = _UNPACK_FORMATS[format][1]
kwargs = dict(_UNPACK_FORMATS[format][2])
func(filename, extract_dir, **kwargs)
+
+
+if hasattr(os, 'statvfs'):
+
+ __all__.append('disk_usage')
+ _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
+
+ def disk_usage(path):
+ """Return disk usage statistics about the given path.
+
+ Returned value is a named tuple with attributes 'total', 'used' and
+ 'free', which are the amount of total, used and free space, in bytes.
+ """
+ st = os.statvfs(path)
+ free = st.f_bavail * st.f_frsize
+ total = st.f_blocks * st.f_frsize
+ used = (st.f_blocks - st.f_bfree) * st.f_frsize
+ return _ntuple_diskusage(total, used, free)
+
+elif os.name == 'nt':
+
+ import nt
+ __all__.append('disk_usage')
+ _ntuple_diskusage = collections.namedtuple('usage', 'total used free')
+
+ def disk_usage(path):
+ """Return disk usage statistics about the given path.
+
+ Returned valus is a named tuple with attributes 'total', 'used' and
+ 'free', which are the amount of total, used and free space, in bytes.
+ """
+ total, free = nt._getdiskusage(path)
+ used = total - free
+ return _ntuple_diskusage(total, used, free)
+
+
+def chown(path, user=None, group=None):
+ """Change owner user and group of the given path.
+
+ user and group can be the uid/gid or the user/group names, and in that case,
+ they are converted to their respective uid/gid.
+ """
+
+ if user is None and group is None:
+ raise ValueError("user and/or group must be set")
+
+ _user = user
+ _group = group
+
+ # -1 means don't change it
+ if user is None:
+ _user = -1
+ # user can either be an int (the uid) or a string (the system username)
+ elif isinstance(user, str):
+ _user = _get_uid(user)
+ if _user is None:
+ raise LookupError("no such user: {!r}".format(user))
+
+ if group is None:
+ _group = -1
+ elif not isinstance(group, int):
+ _group = _get_gid(group)
+ if _group is None:
+ raise LookupError("no such group: {!r}".format(group))
+
+ os.chown(path, _user, _group)
+
+def get_terminal_size(fallback=(80, 24)):
+ """Get the size of the terminal window.
+
+ For each of the two dimensions, the environment variable, COLUMNS
+ and LINES respectively, is checked. If the variable is defined and
+ the value is a positive integer, it is used.
+
+ When COLUMNS or LINES is not defined, which is the common case,
+ the terminal connected to sys.__stdout__ is queried
+ by invoking os.get_terminal_size.
+
+ If the terminal size cannot be successfully queried, either because
+ the system doesn't support querying, or because we are not
+ connected to a terminal, the value given in fallback parameter
+ is used. Fallback defaults to (80, 24) which is the default
+ size used by many terminal emulators.
+
+ The value returned is a named tuple of type os.terminal_size.
+ """
+ # columns, lines are the working values
+ try:
+ columns = int(os.environ['COLUMNS'])
+ except (KeyError, ValueError):
+ columns = 0
+
+ try:
+ lines = int(os.environ['LINES'])
+ except (KeyError, ValueError):
+ lines = 0
+
+ # only query if necessary
+ if columns <= 0 or lines <= 0:
+ try:
+ size = os.get_terminal_size(sys.__stdout__.fileno())
+ except (NameError, OSError):
+ size = os.terminal_size(fallback)
+ if columns <= 0:
+ columns = size.columns
+ if lines <= 0:
+ lines = size.lines
+
+ return os.terminal_size((columns, lines))
+
+def which(cmd, mode=os.F_OK | os.X_OK, path=None):
+ """Given a command, mode, and a PATH string, return the path which
+ conforms to the given mode on the PATH, or None if there is no such
+ file.
+
+ `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
+ of os.environ.get("PATH"), or can be overridden with a custom search
+ path.
+
+ """
+ # Check that a given file can be accessed with the correct mode.
+ # Additionally check that `file` is not a directory, as on Windows
+ # directories pass the os.access check.
+ def _access_check(fn, mode):
+ return (os.path.exists(fn) and os.access(fn, mode)
+ and not os.path.isdir(fn))
+
+ # Short circuit. If we're given a full path which matches the mode
+ # and it exists, we're done here.
+ if _access_check(cmd, mode):
+ return cmd
+
+ path = (path or os.environ.get("PATH", os.defpath)).split(os.pathsep)
+
+ if sys.platform == "win32":
+ # The current directory takes precedence on Windows.
+ if not os.curdir in path:
+ path.insert(0, os.curdir)
+
+ # PATHEXT is necessary to check on Windows.
+ pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
+ # See if the given file matches any of the expected path extensions.
+ # This will allow us to short circuit when given "python.exe".
+ matches = [cmd for ext in pathext if cmd.lower().endswith(ext.lower())]
+ # If it does match, only test that one, otherwise we have to try
+ # others.
+ files = [cmd] if matches else [cmd + ext.lower() for ext in pathext]
+ else:
+ # On other platforms you don't have things like PATHEXT to tell you
+ # what file suffixes are executable, so just pass on cmd as-is.
+ files = [cmd]
+
+ seen = set()
+ for dir in path:
+ dir = os.path.normcase(dir)
+ if not dir in seen:
+ seen.add(dir)
+ for thefile in files:
+ name = os.path.join(dir, thefile)
+ if _access_check(name, mode):
+ return name
+ return None
diff --git a/Lib/site.py b/Lib/site.py
index a2c0bec..0aaf46b 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -13,6 +13,19 @@ prefixes directly, as well as with lib/site-packages appended. The
resulting directories, if they exist, are appended to sys.path, and
also inspected for path configuration files.
+If a file named "pyvenv.cfg" exists one directory above sys.executable,
+sys.prefix and sys.exec_prefix are set to that directory and
+it is also checked for site-packages and site-python (sys.base_prefix and
+sys.base_exec_prefix will always be the "real" prefixes of the Python
+installation). If "pyvenv.cfg" (a bootstrap configuration file) contains
+the key "include-system-site-packages" set to anything other than "false"
+(case-insensitive), the system-level prefixes will still also be
+searched for site-packages; otherwise they won't.
+
+All of the resulting site-specific directories, if they exist, are
+appended to sys.path, and also inspected for path configuration
+files.
+
A path configuration file is a file whose name has the form
<package>.pth; its contents are additional directories (one per line)
to be added to sys.path. Non-existing directories (or
@@ -54,8 +67,8 @@ ImportError exception, it is silently ignored.
import sys
import os
+import re
import builtins
-import traceback
# Prefixes for site-packages; add additional prefixes like /usr/local here
PREFIXES = [sys.prefix, sys.exec_prefix]
@@ -82,7 +95,8 @@ def makepath(*paths):
def abs_paths():
"""Set all module __file__ and __cached__ attributes to an absolute path"""
for m in set(sys.modules.values()):
- if hasattr(m, '__loader__'):
+ if (getattr(getattr(m, '__loader__', None), '__module__', None) !=
+ '_frozen_importlib'):
continue # don't mess with a PEP 302-supplied __file__
try:
m.__file__ = os.path.abspath(m.__file__)
@@ -138,7 +152,7 @@ def addpackage(sitedir, name, known_paths):
reset = 0
fullname = os.path.join(sitedir, name)
try:
- f = open(fullname, "rU")
+ f = open(fullname, "r")
except IOError:
return
with f:
@@ -154,9 +168,10 @@ def addpackage(sitedir, name, known_paths):
if not dircase in known_paths and os.path.exists(dir):
sys.path.append(dir)
known_paths.add(dircase)
- except Exception as err:
+ except Exception:
print("Error processing line {:d} of {}:\n".format(n+1, fullname),
file=sys.stderr)
+ import traceback
for record in traceback.format_exception(*sys.exc_info()):
for line in record.splitlines():
print(' '+line, file=sys.stderr)
@@ -178,6 +193,7 @@ def addsitedir(sitedir, known_paths=None):
sitedir, sitedircase = makepath(sitedir)
if not sitedircase in known_paths:
sys.path.append(sitedir) # Add path component
+ known_paths.add(sitedircase)
try:
names = os.listdir(sitedir)
except os.error:
@@ -241,7 +257,6 @@ def getusersitepackages():
return USER_SITE
from sysconfig import get_path
- import os
if sys.platform == 'darwin':
from sysconfig import get_config_var
@@ -266,18 +281,21 @@ def addusersitepackages(known_paths):
addsitedir(user_site, known_paths)
return known_paths
-def getsitepackages():
+def getsitepackages(prefixes=None):
"""Returns a list containing all global site-packages directories
(and possibly site-python).
- For each directory present in the global ``PREFIXES``, this function
- will find its `site-packages` subdirectory depending on the system
- environment, and will return a list of full paths.
+ For each directory present in ``prefixes`` (or the global ``PREFIXES``),
+ this function will find its `site-packages` subdirectory depending on the
+ system environment, and will return a list of full paths.
"""
sitepackages = []
seen = set()
- for prefix in PREFIXES:
+ if prefixes is None:
+ prefixes = PREFIXES
+
+ for prefix in prefixes:
if not prefix or prefix in seen:
continue
seen.add(prefix)
@@ -303,9 +321,9 @@ def getsitepackages():
sys.version[:3], "site-packages"))
return sitepackages
-def addsitepackages(known_paths):
+def addsitepackages(known_paths, prefixes=None):
"""Add site-packages (and possibly site-python) to sys.path"""
- for sitedir in getsitepackages():
+ for sitedir in getsitepackages(prefixes):
if os.path.isdir(sitedir):
addsitedir(sitedir, known_paths)
@@ -385,7 +403,7 @@ class _Printer(object):
for filename in self.__files:
filename = os.path.join(dir, filename)
try:
- fp = open(filename, "rU")
+ fp = open(filename, "r")
data = fp.read()
fp.close()
break
@@ -475,6 +493,61 @@ def aliasmbcs():
encodings.aliases.aliases[enc] = 'mbcs'
+CONFIG_LINE = re.compile(r'^(?P<key>(\w|[-_])+)\s*=\s*(?P<value>.*)\s*$')
+
+def venv(known_paths):
+ global PREFIXES, ENABLE_USER_SITE
+
+ env = os.environ
+ if sys.platform == 'darwin' and '__PYVENV_LAUNCHER__' in env:
+ executable = os.environ['__PYVENV_LAUNCHER__']
+ else:
+ executable = sys.executable
+ executable_dir, executable_name = os.path.split(executable)
+ site_prefix = os.path.dirname(executable_dir)
+ sys._home = None
+ if sys.platform == 'win32':
+ executable_name = os.path.splitext(executable_name)[0]
+ conf_basename = 'pyvenv.cfg'
+ candidate_confs = [
+ conffile for conffile in (
+ os.path.join(executable_dir, conf_basename),
+ os.path.join(site_prefix, conf_basename)
+ )
+ if os.path.isfile(conffile)
+ ]
+
+ if candidate_confs:
+ virtual_conf = candidate_confs[0]
+ system_site = "true"
+ with open(virtual_conf) as f:
+ for line in f:
+ line = line.strip()
+ m = CONFIG_LINE.match(line)
+ if m:
+ d = m.groupdict()
+ key, value = d['key'].lower(), d['value']
+ if key == 'include-system-site-packages':
+ system_site = value.lower()
+ elif key == 'home':
+ sys._home = value
+
+ sys.prefix = sys.exec_prefix = site_prefix
+
+ # Doing this here ensures venv takes precedence over user-site
+ addsitepackages(known_paths, [sys.prefix])
+
+ # addsitepackages will process site_prefix again if its in PREFIXES,
+ # but that's ok; known_paths will prevent anything being added twice
+ if system_site == "true":
+ PREFIXES.insert(0, sys.prefix)
+ else:
+ PREFIXES = [sys.prefix]
+ ENABLE_USER_SITE = False
+
+ return known_paths
+
+
def execsitecustomize():
"""Run custom site specific code, if available."""
try:
@@ -508,10 +581,16 @@ def execusercustomize():
def main():
+ """Add standard site-specific directories to the module search path.
+
+ This function is called automatically when this module is imported,
+ unless the python interpreter was started with the -S flag.
+ """
global ENABLE_USER_SITE
abs_paths()
known_paths = removeduppaths()
+ known_paths = venv(known_paths)
if ENABLE_USER_SITE is None:
ENABLE_USER_SITE = check_enableusersite()
known_paths = addusersitepackages(known_paths)
@@ -526,7 +605,10 @@ def main():
if ENABLE_USER_SITE:
execusercustomize()
-main()
+# Prevent edition of sys.path when python was started with -S and
+# site is imported later.
+if not sys.flags.no_site:
+ main()
def _script():
help = """\
diff --git a/Lib/smtpd.py b/Lib/smtpd.py
index 8cd405c..778d6d6 100755
--- a/Lib/smtpd.py
+++ b/Lib/smtpd.py
@@ -1,5 +1,5 @@
#! /usr/bin/env python3
-"""An RFC 2821 smtp proxy.
+"""An RFC 5321 smtp proxy.
Usage: %(program)s [options] [localhost:localport [remotehost:remoteport]]
@@ -20,6 +20,11 @@ Options:
Use `classname' as the concrete SMTP proxy class. Uses `PureProxy' by
default.
+ --size limit
+ -s limit
+ Restrict the total size of the incoming message to "limit" number of
+ bytes via the RFC 1870 SIZE extension. Defaults to 33554432 bytes.
+
--debug
-d
Turn on debugging prints.
@@ -35,10 +40,9 @@ given then 8025 is used. If remotehost is not given then `localhost' is used,
and if remoteport is not given, then 25 is used.
"""
-
# Overview:
#
-# This file implements the minimal SMTP protocol as defined in RFC 821. It
+# This file implements the minimal SMTP protocol as defined in RFC 5321. It
# has a hierarchy of classes which implement the backend functionality for the
# smtpd. A number of classes are provided:
#
@@ -66,7 +70,7 @@ and if remoteport is not given, then 25 is used.
#
# - support mailbox delivery
# - alias files
-# - ESMTP
+# - Handle more ESMTP extensions
# - handle error codes from the backend smtpd
import sys
@@ -77,12 +81,14 @@ import time
import socket
import asyncore
import asynchat
+import collections
from warnings import warn
+from email._header_value_parser import get_addr_spec, get_angle_addr
__all__ = ["SMTPServer","DebuggingServer","PureProxy","MailmanProxy"]
program = sys.argv[0]
-__version__ = 'Python SMTP proxy version 0.2'
+__version__ = 'Python SMTP proxy version 0.3'
class Devnull:
@@ -94,9 +100,9 @@ DEBUGSTREAM = Devnull()
NEWLINE = '\n'
EMPTYSTRING = ''
COMMASPACE = ', '
+DATA_SIZE_DEFAULT = 33554432
-
def usage(code, msg=''):
print(__doc__ % globals(), file=sys.stderr)
if msg:
@@ -104,19 +110,23 @@ def usage(code, msg=''):
sys.exit(code)
-
class SMTPChannel(asynchat.async_chat):
COMMAND = 0
DATA = 1
- data_size_limit = 33554432
command_size_limit = 512
+ command_size_limits = collections.defaultdict(lambda x=command_size_limit: x)
+ command_size_limits.update({
+ 'MAIL': command_size_limit + 26,
+ })
+ max_command_size_limit = max(command_size_limits.values())
- def __init__(self, server, conn, addr):
+ def __init__(self, server, conn, addr, data_size_limit=DATA_SIZE_DEFAULT):
asynchat.async_chat.__init__(self, conn)
self.smtp_server = server
self.conn = conn
self.addr = addr
+ self.data_size_limit = data_size_limit
self.received_lines = []
self.smtp_state = self.COMMAND
self.seen_greeting = ''
@@ -137,127 +147,128 @@ class SMTPChannel(asynchat.async_chat):
print('Peer:', repr(self.peer), file=DEBUGSTREAM)
self.push('220 %s %s' % (self.fqdn, __version__))
self.set_terminator(b'\r\n')
+ self.extended_smtp = False
# properties for backwards-compatibility
@property
def __server(self):
warn("Access to __server attribute on SMTPChannel is deprecated, "
- "use 'smtp_server' instead", PendingDeprecationWarning, 2)
+ "use 'smtp_server' instead", DeprecationWarning, 2)
return self.smtp_server
@__server.setter
def __server(self, value):
warn("Setting __server attribute on SMTPChannel is deprecated, "
- "set 'smtp_server' instead", PendingDeprecationWarning, 2)
+ "set 'smtp_server' instead", DeprecationWarning, 2)
self.smtp_server = value
@property
def __line(self):
warn("Access to __line attribute on SMTPChannel is deprecated, "
- "use 'received_lines' instead", PendingDeprecationWarning, 2)
+ "use 'received_lines' instead", DeprecationWarning, 2)
return self.received_lines
@__line.setter
def __line(self, value):
warn("Setting __line attribute on SMTPChannel is deprecated, "
- "set 'received_lines' instead", PendingDeprecationWarning, 2)
+ "set 'received_lines' instead", DeprecationWarning, 2)
self.received_lines = value
@property
def __state(self):
warn("Access to __state attribute on SMTPChannel is deprecated, "
- "use 'smtp_state' instead", PendingDeprecationWarning, 2)
+ "use 'smtp_state' instead", DeprecationWarning, 2)
return self.smtp_state
@__state.setter
def __state(self, value):
warn("Setting __state attribute on SMTPChannel is deprecated, "
- "set 'smtp_state' instead", PendingDeprecationWarning, 2)
+ "set 'smtp_state' instead", DeprecationWarning, 2)
self.smtp_state = value
@property
def __greeting(self):
warn("Access to __greeting attribute on SMTPChannel is deprecated, "
- "use 'seen_greeting' instead", PendingDeprecationWarning, 2)
+ "use 'seen_greeting' instead", DeprecationWarning, 2)
return self.seen_greeting
@__greeting.setter
def __greeting(self, value):
warn("Setting __greeting attribute on SMTPChannel is deprecated, "
- "set 'seen_greeting' instead", PendingDeprecationWarning, 2)
+ "set 'seen_greeting' instead", DeprecationWarning, 2)
self.seen_greeting = value
@property
def __mailfrom(self):
warn("Access to __mailfrom attribute on SMTPChannel is deprecated, "
- "use 'mailfrom' instead", PendingDeprecationWarning, 2)
+ "use 'mailfrom' instead", DeprecationWarning, 2)
return self.mailfrom
@__mailfrom.setter
def __mailfrom(self, value):
warn("Setting __mailfrom attribute on SMTPChannel is deprecated, "
- "set 'mailfrom' instead", PendingDeprecationWarning, 2)
+ "set 'mailfrom' instead", DeprecationWarning, 2)
self.mailfrom = value
@property
def __rcpttos(self):
warn("Access to __rcpttos attribute on SMTPChannel is deprecated, "
- "use 'rcpttos' instead", PendingDeprecationWarning, 2)
+ "use 'rcpttos' instead", DeprecationWarning, 2)
return self.rcpttos
@__rcpttos.setter
def __rcpttos(self, value):
warn("Setting __rcpttos attribute on SMTPChannel is deprecated, "
- "set 'rcpttos' instead", PendingDeprecationWarning, 2)
+ "set 'rcpttos' instead", DeprecationWarning, 2)
self.rcpttos = value
@property
def __data(self):
warn("Access to __data attribute on SMTPChannel is deprecated, "
- "use 'received_data' instead", PendingDeprecationWarning, 2)
+ "use 'received_data' instead", DeprecationWarning, 2)
return self.received_data
@__data.setter
def __data(self, value):
warn("Setting __data attribute on SMTPChannel is deprecated, "
- "set 'received_data' instead", PendingDeprecationWarning, 2)
+ "set 'received_data' instead", DeprecationWarning, 2)
self.received_data = value
@property
def __fqdn(self):
warn("Access to __fqdn attribute on SMTPChannel is deprecated, "
- "use 'fqdn' instead", PendingDeprecationWarning, 2)
+ "use 'fqdn' instead", DeprecationWarning, 2)
return self.fqdn
@__fqdn.setter
def __fqdn(self, value):
warn("Setting __fqdn attribute on SMTPChannel is deprecated, "
- "set 'fqdn' instead", PendingDeprecationWarning, 2)
+ "set 'fqdn' instead", DeprecationWarning, 2)
self.fqdn = value
@property
def __peer(self):
warn("Access to __peer attribute on SMTPChannel is deprecated, "
- "use 'peer' instead", PendingDeprecationWarning, 2)
+ "use 'peer' instead", DeprecationWarning, 2)
return self.peer
@__peer.setter
def __peer(self, value):
warn("Setting __peer attribute on SMTPChannel is deprecated, "
- "set 'peer' instead", PendingDeprecationWarning, 2)
+ "set 'peer' instead", DeprecationWarning, 2)
self.peer = value
@property
def __conn(self):
warn("Access to __conn attribute on SMTPChannel is deprecated, "
- "use 'conn' instead", PendingDeprecationWarning, 2)
+ "use 'conn' instead", DeprecationWarning, 2)
return self.conn
@__conn.setter
def __conn(self, value):
warn("Setting __conn attribute on SMTPChannel is deprecated, "
- "set 'conn' instead", PendingDeprecationWarning, 2)
+ "set 'conn' instead", DeprecationWarning, 2)
self.conn = value
@property
def __addr(self):
warn("Access to __addr attribute on SMTPChannel is deprecated, "
- "use 'addr' instead", PendingDeprecationWarning, 2)
+ "use 'addr' instead", DeprecationWarning, 2)
return self.addr
@__addr.setter
def __addr(self, value):
warn("Setting __addr attribute on SMTPChannel is deprecated, "
- "set 'addr' instead", PendingDeprecationWarning, 2)
+ "set 'addr' instead", DeprecationWarning, 2)
self.addr = value
# Overrides base class for convenience
@@ -268,14 +279,14 @@ class SMTPChannel(asynchat.async_chat):
def collect_incoming_data(self, data):
limit = None
if self.smtp_state == self.COMMAND:
- limit = self.command_size_limit
+ limit = self.max_command_size_limit
elif self.smtp_state == self.DATA:
limit = self.data_size_limit
if limit and self.num_bytes > limit:
return
elif limit:
self.num_bytes += len(data)
- self.received_lines.append(str(data, "utf8"))
+ self.received_lines.append(str(data, "utf-8"))
# Implementation of base class abstract method
def found_terminator(self):
@@ -283,11 +294,7 @@ class SMTPChannel(asynchat.async_chat):
print('Data:', repr(line), file=DEBUGSTREAM)
self.received_lines = []
if self.smtp_state == self.COMMAND:
- if self.num_bytes > self.command_size_limit:
- self.push('500 Error: line too long')
- self.num_bytes = 0
- return
- self.num_bytes = 0
+ sz, self.num_bytes = self.num_bytes, 0
if not line:
self.push('500 Error: bad syntax')
return
@@ -299,9 +306,14 @@ class SMTPChannel(asynchat.async_chat):
else:
command = line[:i].upper()
arg = line[i+1:].strip()
+ max_sz = (self.command_size_limits[command]
+ if self.extended_smtp else self.command_size_limit)
+ if sz > max_sz:
+ self.push('500 Error: line too long')
+ return
method = getattr(self, 'smtp_' + command, None)
if not method:
- self.push('502 Error: command "%s" not implemented' % command)
+ self.push('500 Error: command "%s" not recognized' % command)
return
method(arg)
return
@@ -310,12 +322,12 @@ class SMTPChannel(asynchat.async_chat):
self.push('451 Internal confusion')
self.num_bytes = 0
return
- if self.num_bytes > self.data_size_limit:
+ if self.data_size_limit and self.num_bytes > self.data_size_limit:
self.push('552 Error: Too much mail data')
self.num_bytes = 0
return
# Remove extraneous carriage returns and de-transparency according
- # to RFC 821, Section 4.5.2.
+ # to RFC 5321, Section 4.5.2.
data = []
for text in line.split('\r\n'):
if text and text[0] == '.':
@@ -333,7 +345,7 @@ class SMTPChannel(asynchat.async_chat):
self.num_bytes = 0
self.set_terminator(b'\r\n')
if not status:
- self.push('250 Ok')
+ self.push('250 OK')
else:
self.push(status)
@@ -346,58 +358,188 @@ class SMTPChannel(asynchat.async_chat):
self.push('503 Duplicate HELO/EHLO')
else:
self.seen_greeting = arg
+ self.extended_smtp = False
self.push('250 %s' % self.fqdn)
+ def smtp_EHLO(self, arg):
+ if not arg:
+ self.push('501 Syntax: EHLO hostname')
+ return
+ if self.seen_greeting:
+ self.push('503 Duplicate HELO/EHLO')
+ else:
+ self.seen_greeting = arg
+ self.extended_smtp = True
+ self.push('250-%s' % self.fqdn)
+ if self.data_size_limit:
+ self.push('250-SIZE %s' % self.data_size_limit)
+ self.push('250 HELP')
+
def smtp_NOOP(self, arg):
if arg:
self.push('501 Syntax: NOOP')
else:
- self.push('250 Ok')
+ self.push('250 OK')
def smtp_QUIT(self, arg):
# args is ignored
self.push('221 Bye')
self.close_when_done()
- # factored
- def __getaddr(self, keyword, arg):
- address = None
+ def _strip_command_keyword(self, keyword, arg):
keylen = len(keyword)
if arg[:keylen].upper() == keyword:
- address = arg[keylen:].strip()
- if not address:
- pass
- elif address[0] == '<' and address[-1] == '>' and address != '<>':
- # Addresses can be in the form <person@dom.com> but watch out
- # for null address, e.g. <>
- address = address[1:-1]
- return address
+ return arg[keylen:].strip()
+ return ''
+
+ def _getaddr(self, arg):
+ if not arg:
+ return '', ''
+ if arg.lstrip().startswith('<'):
+ address, rest = get_angle_addr(arg)
+ else:
+ address, rest = get_addr_spec(arg)
+ if not address:
+ return address, rest
+ return address.addr_spec, rest
+
+ def _getparams(self, params):
+ # Return any parameters that appear to be syntactically valid according
+ # to RFC 1869, ignore all others. (Postel rule: accept what we can.)
+ params = [param.split('=', 1) for param in params.split()
+ if '=' in param]
+ return {k: v for k, v in params if k.isalnum()}
+
+ def smtp_HELP(self, arg):
+ if arg:
+ extended = ' [SP <mail parameters]'
+ lc_arg = arg.upper()
+ if lc_arg == 'EHLO':
+ self.push('250 Syntax: EHLO hostname')
+ elif lc_arg == 'HELO':
+ self.push('250 Syntax: HELO hostname')
+ elif lc_arg == 'MAIL':
+ msg = '250 Syntax: MAIL FROM: <address>'
+ if self.extended_smtp:
+ msg += extended
+ self.push(msg)
+ elif lc_arg == 'RCPT':
+ msg = '250 Syntax: RCPT TO: <address>'
+ if self.extended_smtp:
+ msg += extended
+ self.push(msg)
+ elif lc_arg == 'DATA':
+ self.push('250 Syntax: DATA')
+ elif lc_arg == 'RSET':
+ self.push('250 Syntax: RSET')
+ elif lc_arg == 'NOOP':
+ self.push('250 Syntax: NOOP')
+ elif lc_arg == 'QUIT':
+ self.push('250 Syntax: QUIT')
+ elif lc_arg == 'VRFY':
+ self.push('250 Syntax: VRFY <address>')
+ else:
+ self.push('501 Supported commands: EHLO HELO MAIL RCPT '
+ 'DATA RSET NOOP QUIT VRFY')
+ else:
+ self.push('250 Supported commands: EHLO HELO MAIL RCPT DATA '
+ 'RSET NOOP QUIT VRFY')
+
+ def smtp_VRFY(self, arg):
+ if arg:
+ address, params = self._getaddr(arg)
+ if address:
+ self.push('252 Cannot VRFY user, but will accept message '
+ 'and attempt delivery')
+ else:
+ self.push('502 Could not VRFY %s' % arg)
+ else:
+ self.push('501 Syntax: VRFY <address>')
def smtp_MAIL(self, arg):
+ if not self.seen_greeting:
+ self.push('503 Error: send HELO first');
+ return
print('===> MAIL', arg, file=DEBUGSTREAM)
- address = self.__getaddr('FROM:', arg) if arg else None
+ syntaxerr = '501 Syntax: MAIL FROM: <address>'
+ if self.extended_smtp:
+ syntaxerr += ' [SP <mail-parameters>]'
+ if arg is None:
+ self.push(syntaxerr)
+ return
+ arg = self._strip_command_keyword('FROM:', arg)
+ address, params = self._getaddr(arg)
+ if not address:
+ self.push(syntaxerr)
+ return
+ if not self.extended_smtp and params:
+ self.push(syntaxerr)
+ return
if not address:
- self.push('501 Syntax: MAIL FROM:<address>')
+ self.push(syntaxerr)
return
if self.mailfrom:
self.push('503 Error: nested MAIL command')
return
+ params = self._getparams(params.upper())
+ if params is None:
+ self.push(syntaxerr)
+ return
+ size = params.pop('SIZE', None)
+ if size:
+ if not size.isdigit():
+ self.push(syntaxerr)
+ return
+ elif self.data_size_limit and int(size) > self.data_size_limit:
+ self.push('552 Error: message size exceeds fixed maximum message size')
+ return
+ if len(params.keys()) > 0:
+ self.push('555 MAIL FROM parameters not recognized or not implemented')
+ return
self.mailfrom = address
print('sender:', self.mailfrom, file=DEBUGSTREAM)
- self.push('250 Ok')
+ self.push('250 OK')
def smtp_RCPT(self, arg):
+ if not self.seen_greeting:
+ self.push('503 Error: send HELO first');
+ return
print('===> RCPT', arg, file=DEBUGSTREAM)
if not self.mailfrom:
self.push('503 Error: need MAIL command')
return
- address = self.__getaddr('TO:', arg) if arg else None
+ syntaxerr = '501 Syntax: RCPT TO: <address>'
+ if self.extended_smtp:
+ syntaxerr += ' [SP <mail-parameters>]'
+ if arg is None:
+ self.push(syntaxerr)
+ return
+ arg = self._strip_command_keyword('TO:', arg)
+ address, params = self._getaddr(arg)
+ if not address:
+ self.push(syntaxerr)
+ return
+ if params:
+ if self.extended_smtp:
+ params = self._getparams(params.upper())
+ if params is None:
+ self.push(syntaxerr)
+ return
+ else:
+ self.push(syntaxerr)
+ return
+ if not address:
+ self.push(syntaxerr)
+ return
+ if params and len(params.keys()) > 0:
+ self.push('555 RCPT TO parameters not recognized or not implemented')
+ return
if not address:
self.push('501 Syntax: RCPT TO: <address>')
return
self.rcpttos.append(address)
print('recips:', self.rcpttos, file=DEBUGSTREAM)
- self.push('250 Ok')
+ self.push('250 OK')
def smtp_RSET(self, arg):
if arg:
@@ -408,9 +550,12 @@ class SMTPChannel(asynchat.async_chat):
self.rcpttos = []
self.received_data = ''
self.smtp_state = self.COMMAND
- self.push('250 Ok')
+ self.push('250 OK')
def smtp_DATA(self, arg):
+ if not self.seen_greeting:
+ self.push('503 Error: send HELO first');
+ return
if not self.rcpttos:
self.push('503 Error: need RCPT command')
return
@@ -421,15 +566,20 @@ class SMTPChannel(asynchat.async_chat):
self.set_terminator(b'\r\n.\r\n')
self.push('354 End data with <CR><LF>.<CR><LF>')
+ # Commands that have not been implemented
+ def smtp_EXPN(self, arg):
+ self.push('502 EXPN not implemented')
+
-
class SMTPServer(asyncore.dispatcher):
# SMTPChannel class to use for managing client connections
channel_class = SMTPChannel
- def __init__(self, localaddr, remoteaddr):
+ def __init__(self, localaddr, remoteaddr,
+ data_size_limit=DATA_SIZE_DEFAULT):
self._localaddr = localaddr
self._remoteaddr = remoteaddr
+ self.data_size_limit = data_size_limit
asyncore.dispatcher.__init__(self)
try:
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -447,7 +597,7 @@ class SMTPServer(asyncore.dispatcher):
def handle_accepted(self, conn, addr):
print('Incoming connection from %s' % repr(addr), file=DEBUGSTREAM)
- channel = self.channel_class(self, conn, addr)
+ channel = self.channel_class(self, conn, addr, self.data_size_limit)
# API for "doing something useful with the message"
def process_message(self, peer, mailfrom, rcpttos, data):
@@ -475,7 +625,6 @@ class SMTPServer(asyncore.dispatcher):
raise NotImplementedError
-
class DebuggingServer(SMTPServer):
# Do something with the gathered message
def process_message(self, peer, mailfrom, rcpttos, data):
@@ -491,7 +640,6 @@ class DebuggingServer(SMTPServer):
print('------------ END MESSAGE ------------')
-
class PureProxy(SMTPServer):
def process_message(self, peer, mailfrom, rcpttos, data):
lines = data.split('\n')
@@ -532,7 +680,6 @@ class PureProxy(SMTPServer):
return refused
-
class MailmanProxy(PureProxy):
def process_message(self, peer, mailfrom, rcpttos, data):
from io import StringIO
@@ -611,19 +758,18 @@ class MailmanProxy(PureProxy):
msg.Enqueue(mlist, torequest=1)
-
class Options:
setuid = 1
classname = 'PureProxy'
+ size_limit = None
-
def parseargs():
global DEBUGSTREAM
try:
opts, args = getopt.getopt(
- sys.argv[1:], 'nVhc:d',
- ['class=', 'nosetuid', 'version', 'help', 'debug'])
+ sys.argv[1:], 'nVhc:s:d',
+ ['class=', 'nosetuid', 'version', 'help', 'size=', 'debug'])
except getopt.error as e:
usage(1, e)
@@ -640,6 +786,13 @@ def parseargs():
options.classname = arg
elif opt in ('-d', '--debug'):
DEBUGSTREAM = sys.stderr
+ elif opt in ('-s', '--size'):
+ try:
+ int_size = int(arg)
+ options.size_limit = int_size
+ except:
+ print('Invalid size: ' + arg, file=sys.stderr)
+ sys.exit(1)
# parse the rest of the arguments
if len(args) < 1:
@@ -674,7 +827,6 @@ def parseargs():
return options
-
if __name__ == '__main__':
options = parseargs()
# Become nobody
@@ -687,7 +839,8 @@ if __name__ == '__main__':
import __main__ as mod
class_ = getattr(mod, classname)
proxy = class_((options.localhost, options.localport),
- (options.remotehost, options.remoteport))
+ (options.remotehost, options.remoteport),
+ options.size_limit)
if options.setuid:
try:
import pwd
diff --git a/Lib/smtplib.py b/Lib/smtplib.py
index fbef96e..d37b0e2 100644
--- a/Lib/smtplib.py
+++ b/Lib/smtplib.py
@@ -133,24 +133,18 @@ class SMTPAuthenticationError(SMTPResponseException):
combination provided.
"""
-def quoteaddr(addr):
+def quoteaddr(addrstring):
"""Quote a subset of the email addresses defined by RFC 821.
Should be able to handle anything email.utils.parseaddr can handle.
"""
- m = (None, None)
- try:
- m = email.utils.parseaddr(addr)[1]
- except AttributeError:
- pass
- if m == (None, None): # Indicates parse failure or AttributeError
- # something weird here.. punt -ddm
- return "<%s>" % addr
- elif m is None:
- # the sender wants an empty return address
- return "<>"
- else:
- return "<%s>" % m
+ displayname, addr = email.utils.parseaddr(addrstring)
+ if (displayname, addr) == ('', ''):
+ # parseaddr couldn't parse it, use it as is and hope for the best.
+ if addrstring.strip().startswith('<'):
+ return addrstring
+ return "<%s>" % addrstring
+ return "<%s>" % addr
def _addr_only(addrstring):
displayname, addr = email.utils.parseaddr(addrstring)
@@ -180,27 +174,6 @@ try:
except ImportError:
_have_ssl = False
else:
- class SSLFakeFile:
- """A fake file like object that really wraps a SSLObject.
-
- It only supports what is needed in smtplib.
- """
- def __init__(self, sslobj):
- self.sslobj = sslobj
-
- def readline(self):
- str = b""
- chr = None
- while chr != b"\n":
- chr = self.sslobj.read(1)
- if not chr:
- break
- str += chr
- return str
-
- def close(self):
- pass
-
_have_ssl = True
@@ -242,7 +215,8 @@ class SMTP:
default_port = SMTP_PORT
def __init__(self, host='', port=0, local_hostname=None,
- timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+ timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None):
"""Initialize a new instance.
If specified, `host' is the name of the remote host to which to
@@ -250,11 +224,16 @@ class SMTP:
By default, smtplib.SMTP_PORT is used. An SMTPConnectError is raised
if the specified `host' doesn't respond correctly. If specified,
`local_hostname` is used as the FQDN of the local host. By default,
- the local hostname is found using socket.getfqdn().
+ the local hostname is found using socket.getfqdn(). The
+ `source_address` parameter takes a 2-tuple (host, port) for the socket
+ to bind to as its source address before connecting. If the host is ''
+ and port is 0, the OS default behavior will be used.
"""
self.timeout = timeout
self.esmtp_features = {}
+ self.source_address = source_address
+
if host:
(code, msg) = self.connect(host, port)
if code != 220:
@@ -277,6 +256,19 @@ class SMTP:
pass
self.local_hostname = '[%s]' % addr
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ try:
+ code, message = self.docmd("QUIT")
+ if code != 221:
+ raise SMTPResponseException(code, message)
+ except SMTPServerDisconnected:
+ pass
+ finally:
+ self.close()
+
def set_debuglevel(self, debuglevel):
"""Set the debug output level.
@@ -290,10 +282,12 @@ class SMTP:
# This makes it simpler for SMTP_SSL to use the SMTP connect code
# and just alter the socket connection bit.
if self.debuglevel > 0:
- print('connect:', (host, port), file=stderr)
- return socket.create_connection((host, port), timeout)
+ print('connect: to', (host, port), self.source_address,
+ file=stderr)
+ return socket.create_connection((host, port), timeout,
+ self.source_address)
- def connect(self, host='localhost', port=0):
+ def connect(self, host='localhost', port=0, source_address=None):
"""Connect to a host on a given port.
If the hostname ends with a colon (`:') followed by a number, and
@@ -304,6 +298,10 @@ class SMTP:
specified during instantiation.
"""
+
+ if source_address:
+ self.source_address = source_address
+
if not port and (host.find(':') == host.rfind(':')):
i = host.rfind(':')
if i >= 0:
@@ -317,6 +315,7 @@ class SMTP:
if self.debuglevel > 0:
print('connect:', (host, port), file=stderr)
self.sock = self._get_socket(host, port, self.timeout)
+ self.file = None
(code, msg) = self.getreply()
if self.debuglevel > 0:
print("connect:", msg, file=stderr)
@@ -388,7 +387,8 @@ class SMTP:
errmsg = b"\n".join(resp)
if self.debuglevel > 0:
- print('reply: retcode (%s); Msg: %s' % (errcode, errmsg), file=stderr)
+ print('reply: retcode (%s); Msg: %s' % (errcode, errmsg),
+ file=stderr)
return errcode, errmsg
def docmd(self, cmd, args=""):
@@ -632,7 +632,7 @@ class SMTP:
# We could not login sucessfully. Return result of last attempt.
raise SMTPAuthenticationError(code, resp)
- def starttls(self, keyfile=None, certfile=None):
+ def starttls(self, keyfile=None, certfile=None, context=None):
"""Puts the connection to the SMTP server into TLS mode.
If there has been no previous EHLO or HELO command this session, this
@@ -656,8 +656,17 @@ class SMTP:
if resp == 220:
if not _have_ssl:
raise RuntimeError("No SSL support included in this Python")
- self.sock = ssl.wrap_socket(self.sock, keyfile, certfile)
- self.file = SSLFakeFile(self.sock)
+ if context is not None and keyfile is not None:
+ raise ValueError("context and keyfile arguments are mutually "
+ "exclusive")
+ if context is not None and certfile is not None:
+ raise ValueError("context and certfile arguments are mutually "
+ "exclusive")
+ if context is not None:
+ self.sock = context.wrap_socket(self.sock)
+ else:
+ self.sock = ssl.wrap_socket(self.sock, keyfile, certfile)
+ self.file = None
# RFC 3207:
# The client MUST discard any knowledge obtained from
# the server, such as the list of SMTP service extensions,
@@ -786,7 +795,8 @@ class SMTP:
# TODO implement heuristics to guess the correct Resent-* block with an
# option allowing the user to enable the heuristics. (It should be
# possible to guess correctly almost all of the time.)
- resent =msg.get_all('Resent-Date')
+
+ resent = msg.get_all('Resent-Date')
if resent is None:
header_prefix = ''
elif len(resent) == 1:
@@ -795,13 +805,13 @@ class SMTP:
raise ValueError("message has more than one 'Resent-' header block")
if from_addr is None:
# Prefer the sender field per RFC 2822:3.6.2.
- from_addr = (msg[header_prefix+'Sender']
- if (header_prefix+'Sender') in msg
- else msg[header_prefix+'From'])
+ from_addr = (msg[header_prefix + 'Sender']
+ if (header_prefix + 'Sender') in msg
+ else msg[header_prefix + 'From'])
if to_addrs is None:
- addr_fields = [f for f in (msg[header_prefix+'To'],
- msg[header_prefix+'Bcc'],
- msg[header_prefix+'Cc']) if f is not None]
+ addr_fields = [f for f in (msg[header_prefix + 'To'],
+ msg[header_prefix + 'Bcc'],
+ msg[header_prefix + 'Cc']) if f is not None]
to_addrs = [a[1] for a in email.utils.getaddresses(addr_fields)]
# Make a local copy so we can delete the bcc headers.
msg_copy = copy.copy(msg)
@@ -835,26 +845,41 @@ if _have_ssl:
""" This is a subclass derived from SMTP that connects over an SSL encrypted
socket (to use this class you need a socket module that was compiled with SSL
support). If host is not specified, '' (the local host) is used. If port is
- omitted, the standard SMTP-over-SSL port (465) is used. keyfile and certfile
+ omitted, the standard SMTP-over-SSL port (465) is used. The optional
+ source_address takes a two-tuple (host,port) for socket to bind to. keyfile and certfile
are also optional - they can contain a PEM formatted private key and
- certificate chain file for the SSL connection.
+ certificate chain file for the SSL connection. context also optional, can contain
+ a SSLContext, and is an alternative to keyfile and certfile; If it is specified both
+ keyfile and certfile must be None.
"""
default_port = SMTP_SSL_PORT
def __init__(self, host='', port=0, local_hostname=None,
keyfile=None, certfile=None,
- timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+ timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None, context=None):
+ if context is not None and keyfile is not None:
+ raise ValueError("context and keyfile arguments are mutually "
+ "exclusive")
+ if context is not None and certfile is not None:
+ raise ValueError("context and certfile arguments are mutually "
+ "exclusive")
self.keyfile = keyfile
self.certfile = certfile
- SMTP.__init__(self, host, port, local_hostname, timeout)
+ self.context = context
+ SMTP.__init__(self, host, port, local_hostname, timeout,
+ source_address)
def _get_socket(self, host, port, timeout):
if self.debuglevel > 0:
print('connect:', (host, port), file=stderr)
- new_socket = socket.create_connection((host, port), timeout)
- new_socket = ssl.wrap_socket(new_socket, self.keyfile, self.certfile)
- self.file = SSLFakeFile(new_socket)
+ new_socket = socket.create_connection((host, port), timeout,
+ self.source_address)
+ if self.context is not None:
+ new_socket = self.context.wrap_socket(new_socket)
+ else:
+ new_socket = ssl.wrap_socket(new_socket, self.keyfile, self.certfile)
return new_socket
__all__.append("SMTP_SSL")
@@ -879,18 +904,21 @@ class LMTP(SMTP):
ehlo_msg = "lhlo"
- def __init__(self, host='', port=LMTP_PORT, local_hostname=None):
+ def __init__(self, host='', port=LMTP_PORT, local_hostname=None,
+ source_address=None):
"""Initialize a new instance."""
- SMTP.__init__(self, host, port, local_hostname)
+ SMTP.__init__(self, host, port, local_hostname=local_hostname,
+ source_address=source_address)
- def connect(self, host='localhost', port=0):
+ def connect(self, host='localhost', port=0, source_address=None):
"""Connect to the LMTP daemon, on either a Unix or a TCP socket."""
if host[0] != '/':
- return SMTP.connect(self, host, port)
+ return SMTP.connect(self, host, port, source_address=source_address)
# Handle Unix-domain sockets.
try:
self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ self.file = None
self.sock.connect(host)
except socket.error as msg:
if self.debuglevel > 0:
diff --git a/Lib/socket.py b/Lib/socket.py
index ea56a67..d4f1b65 100644
--- a/Lib/socket.py
+++ b/Lib/socket.py
@@ -12,6 +12,7 @@ Functions:
socket() -- create a new socket object
socketpair() -- create a pair of new socket objects [*]
fromfd() -- create a socket object from an open file descriptor [*]
+fromshare() -- create a socket object from data received from socket.share() [*]
gethostname() -- return the current hostname
gethostbyname() -- map a hostname to its IP number
gethostbyaddr() -- map an IP number or hostname to DNS info
@@ -53,7 +54,6 @@ try:
except ImportError:
errno = None
EBADF = getattr(errno, 'EBADF', 9)
-EINTR = getattr(errno, 'EINTR', 4)
EAGAIN = getattr(errno, 'EAGAIN', 11)
EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', 11)
@@ -112,6 +112,9 @@ class socket(_socket.socket):
s[7:])
return s
+ def __getstate__(self):
+ raise TypeError("Cannot serialize socket object")
+
def dup(self):
"""dup() -> socket object
@@ -207,7 +210,6 @@ class socket(_socket.socket):
self._closed = True
return super().detach()
-
def fromfd(fd, family, type, proto=0):
""" fromfd(fd, family, type[, proto]) -> socket object
@@ -217,6 +219,14 @@ def fromfd(fd, family, type, proto=0):
nfd = dup(fd)
return socket(family, type, proto, nfd)
+if hasattr(_socket.socket, "share"):
+ def fromshare(info):
+ """ fromshare(info) -> socket object
+
+ Create a socket object from a the bytes object returned by
+ socket.share(pid).
+ """
+ return socket(0, 0, 0, info)
if hasattr(_socket, "socketpair"):
@@ -288,11 +298,10 @@ class SocketIO(io.RawIOBase):
except timeout:
self._timeout_occurred = True
raise
+ except InterruptedError:
+ continue
except error as e:
- n = e.args[0]
- if n == EINTR:
- continue
- if n in _blocking_errnos:
+ if e.args[0] in _blocking_errnos:
return None
raise
diff --git a/Lib/socketserver.py b/Lib/socketserver.py
index adf9f38..a21318d 100644
--- a/Lib/socketserver.py
+++ b/Lib/socketserver.py
@@ -153,8 +153,8 @@ def _eintr_retry(func, *args):
while True:
try:
return func(*args)
- except (OSError, select.error) as e:
- if e.args[0] != errno.EINTR:
+ except OSError as e:
+ if e.errno != errno.EINTR:
raise
class BaseServer:
@@ -180,6 +180,7 @@ class BaseServer:
- process_request(request, client_address)
- shutdown_request(request)
- close_request(request)
+ - service_actions()
- handle_error()
Methods for derived classes:
@@ -236,6 +237,8 @@ class BaseServer:
poll_interval)
if self in r:
self._handle_request_noblock()
+
+ self.service_actions()
finally:
self.__shutdown_request = False
self.__is_shut_down.set()
@@ -250,6 +253,14 @@ class BaseServer:
self.__shutdown_request = True
self.__is_shut_down.wait()
+ def service_actions(self):
+ """Called by the serve_forever() loop.
+
+ May be overridden by a subclass / Mixin to implement any code that
+ needs to be run during the loop.
+ """
+ pass
+
# The distinction between handling, getting, processing and
# finishing a request is fairly arbitrary. Remember:
#
@@ -550,9 +561,15 @@ class ForkingMixIn:
"""
self.collect_children()
+ def service_actions(self):
+ """Collect the zombie child processes regularly in the ForkingMixIn.
+
+ service_actions is called in the BaseServer's serve_forver loop.
+ """
+ self.collect_children()
+
def process_request(self, request, client_address):
"""Fork a new subprocess to process the request."""
- self.collect_children()
pid = os.fork()
if pid:
# Parent process
@@ -560,6 +577,7 @@ class ForkingMixIn:
self.active_children = []
self.active_children.append(pid)
self.close_request(request)
+ return
else:
# Child process.
# This must never return, hence os._exit()!
diff --git a/Lib/sqlite3/test/dbapi.py b/Lib/sqlite3/test/dbapi.py
index 202bd38..b7ec1ad 100644
--- a/Lib/sqlite3/test/dbapi.py
+++ b/Lib/sqlite3/test/dbapi.py
@@ -1,4 +1,4 @@
-#-*- coding: ISO-8859-1 -*-
+#-*- coding: iso-8859-1 -*-
# pysqlite2/test/dbapi.py: tests for DB-API compliance
#
# Copyright (C) 2004-2010 Gerhard Häring <gh@ghaering.de>
diff --git a/Lib/sqlite3/test/factory.py b/Lib/sqlite3/test/factory.py
index 7f6f347..9e833ae 100644
--- a/Lib/sqlite3/test/factory.py
+++ b/Lib/sqlite3/test/factory.py
@@ -1,4 +1,4 @@
-#-*- coding: ISO-8859-1 -*-
+#-*- coding: iso-8859-1 -*-
# pysqlite2/test/factory.py: tests for the various factories in pysqlite
#
# Copyright (C) 2005-2007 Gerhard Häring <gh@ghaering.de>
@@ -178,6 +178,8 @@ class TextFactoryTests(unittest.TestCase):
self.assertTrue(row[0].endswith("reich"), "column must contain original data")
def CheckOptimizedUnicode(self):
+ # In py3k, str objects are always returned when text_factory
+ # is OptimizedUnicode
self.con.text_factory = sqlite.OptimizedUnicode
austria = "Österreich"
germany = "Deutchland"
diff --git a/Lib/sqlite3/test/hooks.py b/Lib/sqlite3/test/hooks.py
index a92e838..3dc44f6 100644
--- a/Lib/sqlite3/test/hooks.py
+++ b/Lib/sqlite3/test/hooks.py
@@ -1,4 +1,4 @@
-#-*- coding: ISO-8859-1 -*-
+#-*- coding: iso-8859-1 -*-
# pysqlite2/test/hooks.py: tests for various SQLite-specific hooks
#
# Copyright (C) 2006-2007 Gerhard Häring <gh@ghaering.de>
@@ -176,10 +176,60 @@ class ProgressTests(unittest.TestCase):
con.execute("select 1 union select 2 union select 3").fetchall()
self.assertEqual(action, 0, "progress handler was not cleared")
+class TraceCallbackTests(unittest.TestCase):
+ def CheckTraceCallbackUsed(self):
+ """
+ Test that the trace callback is invoked once it is set.
+ """
+ con = sqlite.connect(":memory:")
+ traced_statements = []
+ def trace(statement):
+ traced_statements.append(statement)
+ con.set_trace_callback(trace)
+ con.execute("create table foo(a, b)")
+ self.assertTrue(traced_statements)
+ self.assertTrue(any("create table foo" in stmt for stmt in traced_statements))
+
+ def CheckClearTraceCallback(self):
+ """
+ Test that setting the trace callback to None clears the previously set callback.
+ """
+ con = sqlite.connect(":memory:")
+ traced_statements = []
+ def trace(statement):
+ traced_statements.append(statement)
+ con.set_trace_callback(trace)
+ con.set_trace_callback(None)
+ con.execute("create table foo(a, b)")
+ self.assertFalse(traced_statements, "trace callback was not cleared")
+
+ def CheckUnicodeContent(self):
+ """
+ Test that the statement can contain unicode literals.
+ """
+ unicode_value = '\xf6\xe4\xfc\xd6\xc4\xdc\xdf\u20ac'
+ con = sqlite.connect(":memory:")
+ traced_statements = []
+ def trace(statement):
+ traced_statements.append(statement)
+ con.set_trace_callback(trace)
+ con.execute("create table foo(x)")
+ # Can't execute bound parameters as their values don't appear
+ # in traced statements before SQLite 3.6.21
+ # (cf. http://www.sqlite.org/draft/releaselog/3_6_21.html)
+ con.execute('insert into foo(x) values ("%s")' % unicode_value)
+ con.commit()
+ self.assertTrue(any(unicode_value in stmt for stmt in traced_statements),
+ "Unicode data %s garbled in trace callback: %s"
+ % (ascii(unicode_value), ', '.join(map(ascii, traced_statements))))
+
+
+
def suite():
collation_suite = unittest.makeSuite(CollationTests, "Check")
progress_suite = unittest.makeSuite(ProgressTests, "Check")
- return unittest.TestSuite((collation_suite, progress_suite))
+ trace_suite = unittest.makeSuite(TraceCallbackTests, "Check")
+ return unittest.TestSuite((collation_suite, progress_suite, trace_suite))
def test():
runner = unittest.TextTestRunner()
diff --git a/Lib/sqlite3/test/regression.py b/Lib/sqlite3/test/regression.py
index c7551e3..9d7b276 100644
--- a/Lib/sqlite3/test/regression.py
+++ b/Lib/sqlite3/test/regression.py
@@ -1,4 +1,4 @@
-#-*- coding: ISO-8859-1 -*-
+#-*- coding: iso-8859-1 -*-
# pysqlite2/test/regression.py: pysqlite regression tests
#
# Copyright (C) 2006-2010 Gerhard Häring <gh@ghaering.de>
diff --git a/Lib/sqlite3/test/transactions.py b/Lib/sqlite3/test/transactions.py
index 70e96a1..feb4fa1 100644
--- a/Lib/sqlite3/test/transactions.py
+++ b/Lib/sqlite3/test/transactions.py
@@ -1,4 +1,4 @@
-#-*- coding: ISO-8859-1 -*-
+#-*- coding: iso-8859-1 -*-
# pysqlite2/test/transactions.py: tests transactions
#
# Copyright (C) 2005-2007 Gerhard Häring <gh@ghaering.de>
diff --git a/Lib/sqlite3/test/types.py b/Lib/sqlite3/test/types.py
index 29413e1..3b4cb6d 100644
--- a/Lib/sqlite3/test/types.py
+++ b/Lib/sqlite3/test/types.py
@@ -1,4 +1,4 @@
-#-*- coding: ISO-8859-1 -*-
+#-*- coding: iso-8859-1 -*-
# pysqlite2/test/types.py: tests for type conversion and detection
#
# Copyright (C) 2005 Gerhard Häring <gh@ghaering.de>
@@ -85,7 +85,7 @@ class DeclTypesTests(unittest.TestCase):
if isinstance(_val, bytes):
# sqlite3 always calls __init__ with a bytes created from a
# UTF-8 string when __conform__ was used to store the object.
- _val = _val.decode('utf8')
+ _val = _val.decode('utf-8')
self.val = _val
def __cmp__(self, other):
diff --git a/Lib/sqlite3/test/userfunctions.py b/Lib/sqlite3/test/userfunctions.py
index e01341e..14f6b65 100644
--- a/Lib/sqlite3/test/userfunctions.py
+++ b/Lib/sqlite3/test/userfunctions.py
@@ -1,4 +1,4 @@
-#-*- coding: ISO-8859-1 -*-
+#-*- coding: iso-8859-1 -*-
# pysqlite2/test/userfunctions.py: tests for user-defined functions and
# aggregates.
#
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index f52ea01..75f3a09 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -318,11 +318,13 @@ def _optimize_unicode(charset, fixup):
# XXX: could expand category
return charset # cannot compress
except IndexError:
- # non-BMP characters
+ # non-BMP characters; XXX now they should work
return charset
if negate:
if sys.maxunicode != 65535:
# XXX: negation does not work with big charsets
+ # XXX2: now they should work, but removing this will make the
+ # charmap 17 times bigger
return charset
for i in range(65536):
charmap[i] = not charmap[i]
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 13737ca..d358646 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -177,6 +177,7 @@ class SubPattern:
class Tokenizer:
def __init__(self, string):
+ self.istext = isinstance(string, str)
self.string = string
self.index = 0
self.__next()
@@ -187,14 +188,14 @@ class Tokenizer:
char = self.string[self.index:self.index+1]
# Special case for the str8, since indexing returns a integer
# XXX This is only needed for test_bug_926075 in test_re.py
- if char and isinstance(char, bytes):
+ if char and not self.istext:
char = chr(char[0])
if char == "\\":
try:
c = self.string[self.index + 1]
except IndexError:
raise error("bogus escape (end of line)")
- if isinstance(self.string, bytes):
+ if not self.istext:
c = chr(c)
char = char + c
self.index = self.index + len(char)
@@ -209,6 +210,15 @@ class Tokenizer:
this = self.next
self.__next()
return this
+ def getwhile(self, n, charset):
+ result = ''
+ for _ in range(n):
+ c = self.next
+ if c not in charset:
+ break
+ result += c
+ self.__next()
+ return result
def tell(self):
return self.index, self.next
def seek(self, index):
@@ -241,20 +251,30 @@ def _class_escape(source, escape):
c = escape[1:2]
if c == "x":
# hexadecimal escape (exactly two digits)
- while source.next in HEXDIGITS and len(escape) < 4:
- escape = escape + source.get()
- escape = escape[2:]
- if len(escape) != 2:
- raise error("bogus escape: %s" % repr("\\" + escape))
- return LITERAL, int(escape, 16) & 0xff
+ escape += source.getwhile(2, HEXDIGITS)
+ if len(escape) != 4:
+ raise ValueError
+ return LITERAL, int(escape[2:], 16) & 0xff
+ elif c == "u" and source.istext:
+ # unicode escape (exactly four digits)
+ escape += source.getwhile(4, HEXDIGITS)
+ if len(escape) != 6:
+ raise ValueError
+ return LITERAL, int(escape[2:], 16)
+ elif c == "U" and source.istext:
+ # unicode escape (exactly eight digits)
+ escape += source.getwhile(8, HEXDIGITS)
+ if len(escape) != 10:
+ raise ValueError
+ c = int(escape[2:], 16)
+ chr(c) # raise ValueError for invalid code
+ return LITERAL, c
elif c in OCTDIGITS:
# octal escape (up to three digits)
- while source.next in OCTDIGITS and len(escape) < 4:
- escape = escape + source.get()
- escape = escape[1:]
- return LITERAL, int(escape, 8) & 0xff
+ escape += source.getwhile(2, OCTDIGITS)
+ return LITERAL, int(escape[1:], 8) & 0xff
elif c in DIGITS:
- raise error("bogus escape: %s" % repr(escape))
+ raise ValueError
if len(escape) == 2:
return LITERAL, ord(escape[1])
except ValueError:
@@ -273,15 +293,27 @@ def _escape(source, escape, state):
c = escape[1:2]
if c == "x":
# hexadecimal escape
- while source.next in HEXDIGITS and len(escape) < 4:
- escape = escape + source.get()
+ escape += source.getwhile(2, HEXDIGITS)
if len(escape) != 4:
raise ValueError
return LITERAL, int(escape[2:], 16) & 0xff
+ elif c == "u" and source.istext:
+ # unicode escape (exactly four digits)
+ escape += source.getwhile(4, HEXDIGITS)
+ if len(escape) != 6:
+ raise ValueError
+ return LITERAL, int(escape[2:], 16)
+ elif c == "U" and source.istext:
+ # unicode escape (exactly eight digits)
+ escape += source.getwhile(8, HEXDIGITS)
+ if len(escape) != 10:
+ raise ValueError
+ c = int(escape[2:], 16)
+ chr(c) # raise ValueError for invalid code
+ return LITERAL, c
elif c == "0":
# octal escape
- while source.next in OCTDIGITS and len(escape) < 4:
- escape = escape + source.get()
+ escape += source.getwhile(2, OCTDIGITS)
return LITERAL, int(escape[1:], 8) & 0xff
elif c in DIGITS:
# octal escape *or* decimal group reference (sigh)
@@ -791,7 +823,7 @@ def parse_template(source, pattern):
else:
# The tokenizer implicitly decodes bytes objects as latin-1, we must
# therefore re-encode the final representation.
- encode = lambda x: x.encode('latin1')
+ encode = lambda x: x.encode('latin-1')
for c, s in p:
if c is MARK:
groupsappend((i, s))
diff --git a/Lib/ssl.py b/Lib/ssl.py
index 8137231..3162f56 100644
--- a/Lib/ssl.py
+++ b/Lib/ssl.py
@@ -60,10 +60,25 @@ import re
import _ssl # if we can't import it, let the error propagate
from _ssl import OPENSSL_VERSION_NUMBER, OPENSSL_VERSION_INFO, OPENSSL_VERSION
-from _ssl import _SSLContext, SSLError
+from _ssl import _SSLContext
+from _ssl import (
+ SSLError, SSLZeroReturnError, SSLWantReadError, SSLWantWriteError,
+ SSLSyscallError, SSLEOFError,
+ )
from _ssl import CERT_NONE, CERT_OPTIONAL, CERT_REQUIRED
-from _ssl import OP_ALL, OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_TLSv1
-from _ssl import RAND_status, RAND_egd, RAND_add
+from _ssl import (
+ OP_ALL, OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_TLSv1,
+ OP_CIPHER_SERVER_PREFERENCE, OP_SINGLE_DH_USE
+ )
+try:
+ from _ssl import OP_NO_COMPRESSION
+except ImportError:
+ pass
+try:
+ from _ssl import OP_SINGLE_ECDH_USE
+except ImportError:
+ pass
+from _ssl import RAND_status, RAND_egd, RAND_add, RAND_bytes, RAND_pseudo_bytes
from _ssl import (
SSL_ERROR_ZERO_RETURN,
SSL_ERROR_WANT_READ,
@@ -75,8 +90,9 @@ from _ssl import (
SSL_ERROR_EOF,
SSL_ERROR_INVALID_ERROR_CODE,
)
-from _ssl import HAS_SNI
-from _ssl import PROTOCOL_SSLv3, PROTOCOL_SSLv23, PROTOCOL_TLSv1
+from _ssl import HAS_SNI, HAS_ECDH, HAS_NPN
+from _ssl import (PROTOCOL_SSLv3, PROTOCOL_SSLv23,
+ PROTOCOL_TLSv1)
from _ssl import _OPENSSL_API_VERSION
_PROTOCOL_NAMES = {
@@ -94,11 +110,16 @@ else:
from socket import getnameinfo as _getnameinfo
from socket import error as socket_error
-from socket import socket, AF_INET, SOCK_STREAM
+from socket import socket, AF_INET, SOCK_STREAM, create_connection
import base64 # for DER-to-PEM translation
import traceback
import errno
+if _ssl.HAS_TLS_UNIQUE:
+ CHANNEL_BINDING_TYPES = ['tls-unique']
+else:
+ CHANNEL_BINDING_TYPES = []
+
# Disable weak or insecure ciphers by default
# (OpenSSL's default setting is 'DEFAULT:!aNULL:!eNULL')
_DEFAULT_CIPHERS = 'DEFAULT:!aNULL:!eNULL:!LOW:!EXPORT:!SSLv2'
@@ -188,6 +209,17 @@ class SSLContext(_SSLContext):
server_hostname=server_hostname,
_context=self)
+ def set_npn_protocols(self, npn_protocols):
+ protos = bytearray()
+ for protocol in npn_protocols:
+ b = bytes(protocol, 'ascii')
+ if len(b) == 0 or len(b) > 255:
+ raise SSLError('NPN protocols must be 1 to 255 in length')
+ protos.append(len(b))
+ protos.extend(b)
+
+ self._set_npn_protocols(protos)
+
class SSLSocket(socket):
"""This class implements a subtype of socket.socket that wraps
@@ -199,7 +231,7 @@ class SSLSocket(socket):
ssl_version=PROTOCOL_SSLv23, ca_certs=None,
do_handshake_on_connect=True,
family=AF_INET, type=SOCK_STREAM, proto=0, fileno=None,
- suppress_ragged_eofs=True, ciphers=None,
+ suppress_ragged_eofs=True, npn_protocols=None, ciphers=None,
server_hostname=None,
_context=None):
@@ -219,6 +251,8 @@ class SSLSocket(socket):
self.context.load_verify_locations(ca_certs)
if certfile:
self.context.load_cert_chain(certfile, keyfile)
+ if npn_protocols:
+ self.context.set_npn_protocols(npn_protocols)
if ciphers:
self.context.set_ciphers(ciphers)
self.keyfile = keyfile
@@ -319,6 +353,13 @@ class SSLSocket(socket):
self._checkClosed()
return self._sslobj.peer_certificate(binary_form)
+ def selected_npn_protocol(self):
+ self._checkClosed()
+ if not self._sslobj or not _ssl.HAS_NPN:
+ return None
+ else:
+ return self._sslobj.selected_npn_protocol()
+
def cipher(self):
self._checkClosed()
if not self._sslobj:
@@ -326,6 +367,13 @@ class SSLSocket(socket):
else:
return self._sslobj.cipher()
+ def compression(self):
+ self._checkClosed()
+ if not self._sslobj:
+ return None
+ else:
+ return self._sslobj.compression()
+
def send(self, data, flags=0):
self._checkClosed()
if self._sslobj:
@@ -358,6 +406,12 @@ class SSLSocket(socket):
else:
return socket.sendto(self, data, flags_or_addr, addr)
+ def sendmsg(self, *args, **kwargs):
+ # Ensure programs don't send data unencrypted if they try to
+ # use this method.
+ raise NotImplementedError("sendmsg not allowed on instances of %s" %
+ self.__class__)
+
def sendall(self, data, flags=0):
self._checkClosed()
if self._sslobj:
@@ -416,6 +470,14 @@ class SSLSocket(socket):
else:
return socket.recvfrom_into(self, buffer, nbytes, flags)
+ def recvmsg(self, *args, **kwargs):
+ raise NotImplementedError("recvmsg not allowed on instances of %s" %
+ self.__class__)
+
+ def recvmsg_into(self, *args, **kwargs):
+ raise NotImplementedError("recvmsg_into not allowed on instances of "
+ "%s" % self.__class__)
+
def pending(self):
self._checkClosed()
if self._sslobj:
@@ -502,6 +564,21 @@ class SSLSocket(socket):
self.do_handshake_on_connect),
addr)
+ def get_channel_binding(self, cb_type="tls-unique"):
+ """Get channel binding data for current connection. Raise ValueError
+ if the requested `cb_type` is not supported. Return bytes of the data
+ or None if the data is not available (e.g. before the handshake).
+ """
+ if cb_type not in CHANNEL_BINDING_TYPES:
+ raise ValueError("Unsupported channel binding type")
+ if cb_type != "tls-unique":
+ raise NotImplementedError(
+ "{0} channel binding type not implemented"
+ .format(cb_type))
+ if self._sslobj is None:
+ return None
+ return self._sslobj.tls_unique_cb()
+
def __del__(self):
# sys.stderr.write("__del__ on %s\n" % repr(self))
self._real_close()
@@ -511,7 +588,8 @@ def wrap_socket(sock, keyfile=None, certfile=None,
server_side=False, cert_reqs=CERT_NONE,
ssl_version=PROTOCOL_SSLv23, ca_certs=None,
do_handshake_on_connect=True,
- suppress_ragged_eofs=True, ciphers=None):
+ suppress_ragged_eofs=True,
+ ciphers=None):
return SSLSocket(sock=sock, keyfile=keyfile, certfile=certfile,
server_side=server_side, cert_reqs=cert_reqs,
@@ -566,9 +644,9 @@ def get_server_certificate(addr, ssl_version=PROTOCOL_SSLv3, ca_certs=None):
cert_reqs = CERT_REQUIRED
else:
cert_reqs = CERT_NONE
- s = wrap_socket(socket(), ssl_version=ssl_version,
+ s = create_connection(addr)
+ s = wrap_socket(s, ssl_version=ssl_version,
cert_reqs=cert_reqs, ca_certs=ca_certs)
- s.connect(addr)
dercert = s.getpeercert(True)
s.close()
return DER_cert_to_PEM_cert(dercert)
diff --git a/Lib/stat.py b/Lib/stat.py
index 78ccd5e..704adfe 100644
--- a/Lib/stat.py
+++ b/Lib/stat.py
@@ -19,78 +19,131 @@ ST_CTIME = 9
# Extract bits from the mode
def S_IMODE(mode):
+ """Return the portion of the file's mode that can be set by
+ os.chmod().
+ """
return mode & 0o7777
def S_IFMT(mode):
+ """Return the portion of the file's mode that describes the
+ file type.
+ """
return mode & 0o170000
# Constants used as S_IFMT() for various file types
# (not all are implemented on all systems)
-S_IFDIR = 0o040000
-S_IFCHR = 0o020000
-S_IFBLK = 0o060000
-S_IFREG = 0o100000
-S_IFIFO = 0o010000
-S_IFLNK = 0o120000
-S_IFSOCK = 0o140000
+S_IFDIR = 0o040000 # directory
+S_IFCHR = 0o020000 # character device
+S_IFBLK = 0o060000 # block device
+S_IFREG = 0o100000 # regular file
+S_IFIFO = 0o010000 # fifo (named pipe)
+S_IFLNK = 0o120000 # symbolic link
+S_IFSOCK = 0o140000 # socket file
# Functions to test for each file type
def S_ISDIR(mode):
+ """Return True if mode is from a directory."""
return S_IFMT(mode) == S_IFDIR
def S_ISCHR(mode):
+ """Return True if mode is from a character special device file."""
return S_IFMT(mode) == S_IFCHR
def S_ISBLK(mode):
+ """Return True if mode is from a block special device file."""
return S_IFMT(mode) == S_IFBLK
def S_ISREG(mode):
+ """Return True if mode is from a regular file."""
return S_IFMT(mode) == S_IFREG
def S_ISFIFO(mode):
+ """Return True if mode is from a FIFO (named pipe)."""
return S_IFMT(mode) == S_IFIFO
def S_ISLNK(mode):
+ """Return True if mode is from a symbolic link."""
return S_IFMT(mode) == S_IFLNK
def S_ISSOCK(mode):
+ """Return True if mode is from a socket."""
return S_IFMT(mode) == S_IFSOCK
# Names for permission bits
-S_ISUID = 0o4000
-S_ISGID = 0o2000
-S_ENFMT = S_ISGID
-S_ISVTX = 0o1000
-S_IREAD = 0o0400
-S_IWRITE = 0o0200
-S_IEXEC = 0o0100
-S_IRWXU = 0o0700
-S_IRUSR = 0o0400
-S_IWUSR = 0o0200
-S_IXUSR = 0o0100
-S_IRWXG = 0o0070
-S_IRGRP = 0o0040
-S_IWGRP = 0o0020
-S_IXGRP = 0o0010
-S_IRWXO = 0o0007
-S_IROTH = 0o0004
-S_IWOTH = 0o0002
-S_IXOTH = 0o0001
+S_ISUID = 0o4000 # set UID bit
+S_ISGID = 0o2000 # set GID bit
+S_ENFMT = S_ISGID # file locking enforcement
+S_ISVTX = 0o1000 # sticky bit
+S_IREAD = 0o0400 # Unix V7 synonym for S_IRUSR
+S_IWRITE = 0o0200 # Unix V7 synonym for S_IWUSR
+S_IEXEC = 0o0100 # Unix V7 synonym for S_IXUSR
+S_IRWXU = 0o0700 # mask for owner permissions
+S_IRUSR = 0o0400 # read by owner
+S_IWUSR = 0o0200 # write by owner
+S_IXUSR = 0o0100 # execute by owner
+S_IRWXG = 0o0070 # mask for group permissions
+S_IRGRP = 0o0040 # read by group
+S_IWGRP = 0o0020 # write by group
+S_IXGRP = 0o0010 # execute by group
+S_IRWXO = 0o0007 # mask for others (not in group) permissions
+S_IROTH = 0o0004 # read by others
+S_IWOTH = 0o0002 # write by others
+S_IXOTH = 0o0001 # execute by others
# Names for file flags
-UF_NODUMP = 0x00000001
-UF_IMMUTABLE = 0x00000002
-UF_APPEND = 0x00000004
-UF_OPAQUE = 0x00000008
-UF_NOUNLINK = 0x00000010
-UF_COMPRESSED = 0x00000020 # OS X: file is hfs-compressed
-UF_HIDDEN = 0x00008000 # OS X: file should not be displayed
-SF_ARCHIVED = 0x00010000
-SF_IMMUTABLE = 0x00020000
-SF_APPEND = 0x00040000
-SF_NOUNLINK = 0x00100000
-SF_SNAPSHOT = 0x00200000
+UF_NODUMP = 0x00000001 # do not dump file
+UF_IMMUTABLE = 0x00000002 # file may not be changed
+UF_APPEND = 0x00000004 # file may only be appended to
+UF_OPAQUE = 0x00000008 # directory is opaque when viewed through a union stack
+UF_NOUNLINK = 0x00000010 # file may not be renamed or deleted
+UF_COMPRESSED = 0x00000020 # OS X: file is hfs-compressed
+UF_HIDDEN = 0x00008000 # OS X: file should not be displayed
+SF_ARCHIVED = 0x00010000 # file may be archived
+SF_IMMUTABLE = 0x00020000 # file may not be changed
+SF_APPEND = 0x00040000 # file may only be appended to
+SF_NOUNLINK = 0x00100000 # file may not be renamed or deleted
+SF_SNAPSHOT = 0x00200000 # file is a snapshot file
+
+
+_filemode_table = (
+ ((S_IFLNK, "l"),
+ (S_IFREG, "-"),
+ (S_IFBLK, "b"),
+ (S_IFDIR, "d"),
+ (S_IFCHR, "c"),
+ (S_IFIFO, "p")),
+
+ ((S_IRUSR, "r"),),
+ ((S_IWUSR, "w"),),
+ ((S_IXUSR|S_ISUID, "s"),
+ (S_ISUID, "S"),
+ (S_IXUSR, "x")),
+
+ ((S_IRGRP, "r"),),
+ ((S_IWGRP, "w"),),
+ ((S_IXGRP|S_ISGID, "s"),
+ (S_ISGID, "S"),
+ (S_IXGRP, "x")),
+
+ ((S_IROTH, "r"),),
+ ((S_IWOTH, "w"),),
+ ((S_IXOTH|S_ISVTX, "t"),
+ (S_ISVTX, "T"),
+ (S_IXOTH, "x"))
+)
+
+def filemode(mode):
+ """Convert a file's mode to a string of the form '-rwxrwxrwx'."""
+ perm = []
+ for table in _filemode_table:
+ for bit, char in table:
+ if mode & bit == bit:
+ perm.append(char)
+ break
+ else:
+ perm.append("-")
+ return "".join(perm)
diff --git a/Lib/string.py b/Lib/string.py
index 0f4ede2..b57c79b 100644
--- a/Lib/string.py
+++ b/Lib/string.py
@@ -46,23 +46,7 @@ def capwords(s, sep=None):
####################################################################
import re as _re
-
-class _multimap:
- """Helper class for combining multiple mappings.
-
- Used by .{safe_,}substitute() to combine the mapping and keyword
- arguments.
- """
- def __init__(self, primary, secondary):
- self._primary = primary
- self._secondary = secondary
-
- def __getitem__(self, key):
- try:
- return self._primary[key]
- except KeyError:
- return self._secondary[key]
-
+from collections import ChainMap
class _TemplateMetaclass(type):
pattern = r"""
@@ -100,7 +84,7 @@ class Template(metaclass=_TemplateMetaclass):
def _invalid(self, mo):
i = mo.start('invalid')
- lines = self.template[:i].splitlines(True)
+ lines = self.template[:i].splitlines(keepends=True)
if not lines:
colno = 1
lineno = 1
@@ -116,7 +100,7 @@ class Template(metaclass=_TemplateMetaclass):
if not args:
mapping = kws
elif kws:
- mapping = _multimap(kws, args[0])
+ mapping = ChainMap(kws, args[0])
else:
mapping = args[0]
# Helper function for .sub()
@@ -142,7 +126,7 @@ class Template(metaclass=_TemplateMetaclass):
if not args:
mapping = kws
elif kws:
- mapping = _multimap(kws, args[0])
+ mapping = ChainMap(kws, args[0])
else:
mapping = args[0]
# Helper function for .sub()
diff --git a/Lib/subprocess.py b/Lib/subprocess.py
index 83c79ef..57cc1a4 100644
--- a/Lib/subprocess.py
+++ b/Lib/subprocess.py
@@ -191,8 +191,10 @@ should prepare for OSErrors.
A ValueError will be raised if Popen is called with invalid arguments.
-check_call() and check_output() will raise CalledProcessError, if the
-called process returns a non-zero return code.
+Exceptions defined within this module inherit from SubprocessError.
+check_call() and check_output() will raise CalledProcessError if the
+called process returns a non-zero return code. TimeoutExpired
+be raised if a timeout was specified and expired.
Security
@@ -340,15 +342,23 @@ mswindows = (sys.platform == "win32")
import io
import os
+import time
import traceback
import gc
import signal
import builtins
import warnings
import errno
+try:
+ from time import monotonic as _time
+except ImportError:
+ from time import time as _time
# Exception classes used by this module.
-class CalledProcessError(Exception):
+class SubprocessError(Exception): pass
+
+
+class CalledProcessError(SubprocessError):
"""This exception is raised when a process run by check_call() or
check_output() returns a non-zero exit status.
The exit status will be stored in the returncode attribute;
@@ -362,10 +372,24 @@ class CalledProcessError(Exception):
return "Command '%s' returned non-zero exit status %d" % (self.cmd, self.returncode)
+class TimeoutExpired(SubprocessError):
+ """This exception is raised when the timeout expires while waiting for a
+ child process.
+ """
+ def __init__(self, cmd, timeout, output=None):
+ self.cmd = cmd
+ self.timeout = timeout
+ self.output = output
+
+ def __str__(self):
+ return ("Command '%s' timed out after %s seconds" %
+ (self.cmd, self.timeout))
+
+
if mswindows:
import threading
import msvcrt
- import _subprocess
+ import _winapi
class STARTUPINFO:
dwFlags = 0
hStdInput = None
@@ -377,53 +401,49 @@ if mswindows:
else:
import select
_has_poll = hasattr(select, 'poll')
- import fcntl
- import pickle
-
- try:
- import _posixsubprocess
- except ImportError:
- _posixsubprocess = None
- warnings.warn("The _posixsubprocess module is not being used. "
- "Child process reliability may suffer if your "
- "program uses threads.", RuntimeWarning)
+ import _posixsubprocess
+ _create_pipe = _posixsubprocess.cloexec_pipe
# When select or poll has indicated that the file is writable,
# we can write up to _PIPE_BUF bytes without risk of blocking.
# POSIX defines PIPE_BUF as >= 512.
_PIPE_BUF = getattr(select, 'PIPE_BUF', 512)
- _FD_CLOEXEC = getattr(fcntl, 'FD_CLOEXEC', 1)
-
- def _set_cloexec(fd, cloexec):
- old = fcntl.fcntl(fd, fcntl.F_GETFD)
- if cloexec:
- fcntl.fcntl(fd, fcntl.F_SETFD, old | _FD_CLOEXEC)
- else:
- fcntl.fcntl(fd, fcntl.F_SETFD, old & ~_FD_CLOEXEC)
-
- if _posixsubprocess:
- _create_pipe = _posixsubprocess.cloexec_pipe
- else:
- def _create_pipe():
- fds = os.pipe()
- _set_cloexec(fds[0], True)
- _set_cloexec(fds[1], True)
- return fds
__all__ = ["Popen", "PIPE", "STDOUT", "call", "check_call", "getstatusoutput",
- "getoutput", "check_output", "CalledProcessError"]
+ "getoutput", "check_output", "CalledProcessError", "DEVNULL"]
if mswindows:
- from _subprocess import (CREATE_NEW_CONSOLE, CREATE_NEW_PROCESS_GROUP,
- STD_INPUT_HANDLE, STD_OUTPUT_HANDLE,
- STD_ERROR_HANDLE, SW_HIDE,
- STARTF_USESTDHANDLES, STARTF_USESHOWWINDOW)
+ from _winapi import (CREATE_NEW_CONSOLE, CREATE_NEW_PROCESS_GROUP,
+ STD_INPUT_HANDLE, STD_OUTPUT_HANDLE,
+ STD_ERROR_HANDLE, SW_HIDE,
+ STARTF_USESTDHANDLES, STARTF_USESHOWWINDOW)
__all__.extend(["CREATE_NEW_CONSOLE", "CREATE_NEW_PROCESS_GROUP",
"STD_INPUT_HANDLE", "STD_OUTPUT_HANDLE",
"STD_ERROR_HANDLE", "SW_HIDE",
"STARTF_USESTDHANDLES", "STARTF_USESHOWWINDOW"])
+
+ class Handle(int):
+ closed = False
+
+ def Close(self, CloseHandle=_winapi.CloseHandle):
+ if not self.closed:
+ self.closed = True
+ CloseHandle(self)
+
+ def Detach(self):
+ if not self.closed:
+ self.closed = True
+ return int(self)
+ raise ValueError("already closed")
+
+ def __repr__(self):
+ return "Handle(%d)" % int(self)
+
+ __del__ = Close
+ __str__ = __repr__
+
try:
MAXFD = os.sysconf("SC_OPEN_MAX")
except:
@@ -448,27 +468,63 @@ def _cleanup():
PIPE = -1
STDOUT = -2
+DEVNULL = -3
def _eintr_retry_call(func, *args):
while True:
try:
return func(*args)
- except (OSError, IOError) as e:
- if e.errno == errno.EINTR:
- continue
- raise
-
-
-def call(*popenargs, **kwargs):
- """Run command with arguments. Wait for command to complete, then
- return the returncode attribute.
+ except InterruptedError:
+ continue
+
+
+# XXX This function is only used by multiprocessing and the test suite,
+# but it's here so that it can be imported when Python is compiled without
+# threads.
+
+def _args_from_interpreter_flags():
+ """Return a list of command-line arguments reproducing the current
+ settings in sys.flags and sys.warnoptions."""
+ flag_opt_map = {
+ 'debug': 'd',
+ # 'inspect': 'i',
+ # 'interactive': 'i',
+ 'optimize': 'O',
+ 'dont_write_bytecode': 'B',
+ 'no_user_site': 's',
+ 'no_site': 'S',
+ 'ignore_environment': 'E',
+ 'verbose': 'v',
+ 'bytes_warning': 'b',
+ 'quiet': 'q',
+ 'hash_randomization': 'R',
+ }
+ args = []
+ for flag, opt in flag_opt_map.items():
+ v = getattr(sys.flags, flag)
+ if v > 0:
+ args.append('-' + opt * v)
+ for opt in sys.warnoptions:
+ args.append('-W' + opt)
+ return args
+
+
+def call(*popenargs, timeout=None, **kwargs):
+ """Run command with arguments. Wait for command to complete or
+ timeout, then return the returncode attribute.
The arguments are the same as for the Popen constructor. Example:
retcode = call(["ls", "-l"])
"""
- return Popen(*popenargs, **kwargs).wait()
+ with Popen(*popenargs, **kwargs) as p:
+ try:
+ return p.wait(timeout=timeout)
+ except:
+ p.kill()
+ p.wait()
+ raise
def check_call(*popenargs, **kwargs):
@@ -477,7 +533,7 @@ def check_call(*popenargs, **kwargs):
CalledProcessError. The CalledProcessError object will have the
return code in the returncode attribute.
- The arguments are the same as for the Popen constructor. Example:
+ The arguments are the same as for the call function. Example:
check_call(["ls", "-l"])
"""
@@ -490,7 +546,7 @@ def check_call(*popenargs, **kwargs):
return 0
-def check_output(*popenargs, **kwargs):
+def check_output(*popenargs, timeout=None, **kwargs):
r"""Run command with arguments and return its output as a byte string.
If the exit code was non-zero it raises a CalledProcessError. The
@@ -512,14 +568,20 @@ def check_output(*popenargs, **kwargs):
"""
if 'stdout' in kwargs:
raise ValueError('stdout argument not allowed, it will be overridden.')
- process = Popen(*popenargs, stdout=PIPE, **kwargs)
- output, unused_err = process.communicate()
- retcode = process.poll()
- if retcode:
- cmd = kwargs.get("args")
- if cmd is None:
- cmd = popenargs[0]
- raise CalledProcessError(retcode, cmd, output=output)
+ with Popen(*popenargs, stdout=PIPE, **kwargs) as process:
+ try:
+ output, unused_err = process.communicate(timeout=timeout)
+ except TimeoutExpired:
+ process.kill()
+ output, unused_err = process.communicate()
+ raise TimeoutExpired(process.args, timeout, output=output)
+ except:
+ process.kill()
+ process.wait()
+ raise
+ retcode = process.poll()
+ if retcode:
+ raise CalledProcessError(retcode, process.args, output=output)
return output
@@ -614,11 +676,19 @@ def getstatusoutput(cmd):
>>> subprocess.getstatusoutput('/bin/junk')
(256, 'sh: /bin/junk: not found')
"""
- pipe = os.popen('{ ' + cmd + '; } 2>&1', 'r')
- text = pipe.read()
- sts = pipe.close()
- if sts is None: sts = 0
- if text[-1:] == '\n': text = text[:-1]
+ with os.popen('{ ' + cmd + '; } 2>&1', 'r') as pipe:
+ try:
+ text = pipe.read()
+ sts = pipe.close()
+ except:
+ process = pipe._proc
+ process.kill()
+ process.wait()
+ raise
+ if sts is None:
+ sts = 0
+ if text[-1:] == '\n':
+ text = text[:-1]
return sts, text
@@ -650,6 +720,8 @@ class Popen(object):
_cleanup()
self._child_created = False
+ self._input = None
+ self._communication_started = False
if bufsize is None:
bufsize = 0 # Restore default
if not isinstance(bufsize, int):
@@ -684,6 +756,7 @@ class Popen(object):
raise ValueError("creationflags is only supported on Windows "
"platforms")
+ self.args = args
self.stdin = None
self.stdout = None
self.stderr = None
@@ -724,7 +797,7 @@ class Popen(object):
if p2cwrite != -1:
self.stdin = io.open(p2cwrite, 'wb', bufsize)
- if self.universal_newlines:
+ if universal_newlines:
self.stdin = io.TextIOWrapper(self.stdin, write_through=True)
if c2pread != -1:
self.stdout = io.open(c2pread, 'rb', bufsize)
@@ -737,7 +810,7 @@ class Popen(object):
try:
self._execute_child(args, executable, preexec_fn, close_fds,
- pass_fds, cwd, env, universal_newlines,
+ pass_fds, cwd, env,
startupinfo, creationflags, shell,
p2cread, p2cwrite,
c2pread, c2pwrite,
@@ -784,19 +857,28 @@ class Popen(object):
# Child is still running, keep us alive until we can wait on it.
_active.append(self)
+ def _get_devnull(self):
+ if not hasattr(self, '_devnull'):
+ self._devnull = os.open(os.devnull, os.O_RDWR)
+ return self._devnull
- def communicate(self, input=None):
+ def communicate(self, input=None, timeout=None):
"""Interact with process: Send data to stdin. Read data from
stdout and stderr, until end-of-file is reached. Wait for
- process to terminate. The optional input argument should be a
- string to be sent to the child process, or None, if no data
+ process to terminate. The optional input argument should be
+ bytes to be sent to the child process, or None, if no data
should be sent to the child.
communicate() returns a tuple (stdout, stderr)."""
- # Optimization: If we are only using one pipe, or no pipe at
- # all, using select() or threads is unnecessary.
- if [self.stdin, self.stdout, self.stderr].count(None) >= 2:
+ if self._communication_started and input:
+ raise ValueError("Cannot send input after starting communication")
+
+ # Optimization: If we are not worried about timeouts, we haven't
+ # started communicating, and we have one or zero pipes, using select()
+ # or threads is unnecessary.
+ if (timeout is None and not self._communication_started and
+ [self.stdin, self.stdout, self.stderr].count(None) >= 2):
stdout = None
stderr = None
if self.stdin:
@@ -814,15 +896,42 @@ class Popen(object):
stderr = _eintr_retry_call(self.stderr.read)
self.stderr.close()
self.wait()
- return (stdout, stderr)
+ else:
+ if timeout is not None:
+ endtime = _time() + timeout
+ else:
+ endtime = None
- return self._communicate(input)
+ try:
+ stdout, stderr = self._communicate(input, endtime, timeout)
+ finally:
+ self._communication_started = True
+
+ sts = self.wait(timeout=self._remaining_time(endtime))
+
+ return (stdout, stderr)
def poll(self):
return self._internal_poll()
+ def _remaining_time(self, endtime):
+ """Convenience for _communicate when computing timeouts."""
+ if endtime is None:
+ return None
+ else:
+ return endtime - _time()
+
+
+ def _check_timeout(self, endtime, orig_timeout):
+ """Convenience for checking if a timeout has expired."""
+ if endtime is None:
+ return
+ if _time() > endtime:
+ raise TimeoutExpired(self.args, orig_timeout)
+
+
if mswindows:
#
# Windows methods
@@ -839,11 +948,16 @@ class Popen(object):
errread, errwrite = -1, -1
if stdin is None:
- p2cread = _subprocess.GetStdHandle(_subprocess.STD_INPUT_HANDLE)
+ p2cread = _winapi.GetStdHandle(_winapi.STD_INPUT_HANDLE)
if p2cread is None:
- p2cread, _ = _subprocess.CreatePipe(None, 0)
+ p2cread, _ = _winapi.CreatePipe(None, 0)
+ p2cread = Handle(p2cread)
+ _winapi.CloseHandle(_)
elif stdin == PIPE:
- p2cread, p2cwrite = _subprocess.CreatePipe(None, 0)
+ p2cread, p2cwrite = _winapi.CreatePipe(None, 0)
+ p2cread, p2cwrite = Handle(p2cread), Handle(p2cwrite)
+ elif stdin == DEVNULL:
+ p2cread = msvcrt.get_osfhandle(self._get_devnull())
elif isinstance(stdin, int):
p2cread = msvcrt.get_osfhandle(stdin)
else:
@@ -852,11 +966,16 @@ class Popen(object):
p2cread = self._make_inheritable(p2cread)
if stdout is None:
- c2pwrite = _subprocess.GetStdHandle(_subprocess.STD_OUTPUT_HANDLE)
+ c2pwrite = _winapi.GetStdHandle(_winapi.STD_OUTPUT_HANDLE)
if c2pwrite is None:
- _, c2pwrite = _subprocess.CreatePipe(None, 0)
+ _, c2pwrite = _winapi.CreatePipe(None, 0)
+ c2pwrite = Handle(c2pwrite)
+ _winapi.CloseHandle(_)
elif stdout == PIPE:
- c2pread, c2pwrite = _subprocess.CreatePipe(None, 0)
+ c2pread, c2pwrite = _winapi.CreatePipe(None, 0)
+ c2pread, c2pwrite = Handle(c2pread), Handle(c2pwrite)
+ elif stdout == DEVNULL:
+ c2pwrite = msvcrt.get_osfhandle(self._get_devnull())
elif isinstance(stdout, int):
c2pwrite = msvcrt.get_osfhandle(stdout)
else:
@@ -865,13 +984,18 @@ class Popen(object):
c2pwrite = self._make_inheritable(c2pwrite)
if stderr is None:
- errwrite = _subprocess.GetStdHandle(_subprocess.STD_ERROR_HANDLE)
+ errwrite = _winapi.GetStdHandle(_winapi.STD_ERROR_HANDLE)
if errwrite is None:
- _, errwrite = _subprocess.CreatePipe(None, 0)
+ _, errwrite = _winapi.CreatePipe(None, 0)
+ errwrite = Handle(errwrite)
+ _winapi.CloseHandle(_)
elif stderr == PIPE:
- errread, errwrite = _subprocess.CreatePipe(None, 0)
+ errread, errwrite = _winapi.CreatePipe(None, 0)
+ errread, errwrite = Handle(errread), Handle(errwrite)
elif stderr == STDOUT:
errwrite = c2pwrite
+ elif stderr == DEVNULL:
+ errwrite = msvcrt.get_osfhandle(self._get_devnull())
elif isinstance(stderr, int):
errwrite = msvcrt.get_osfhandle(stderr)
else:
@@ -886,20 +1010,22 @@ class Popen(object):
def _make_inheritable(self, handle):
"""Return a duplicate of handle, which is inheritable"""
- return _subprocess.DuplicateHandle(_subprocess.GetCurrentProcess(),
- handle, _subprocess.GetCurrentProcess(), 0, 1,
- _subprocess.DUPLICATE_SAME_ACCESS)
+ h = _winapi.DuplicateHandle(
+ _winapi.GetCurrentProcess(), handle,
+ _winapi.GetCurrentProcess(), 0, 1,
+ _winapi.DUPLICATE_SAME_ACCESS)
+ return Handle(h)
def _find_w9xpopen(self):
"""Find and return absolut path to w9xpopen.exe"""
w9xpopen = os.path.join(
- os.path.dirname(_subprocess.GetModuleFileName(0)),
+ os.path.dirname(_winapi.GetModuleFileName(0)),
"w9xpopen.exe")
if not os.path.exists(w9xpopen):
# Eeek - file-not-found - possibly an embedding
# situation - see if we can locate it in sys.exec_prefix
- w9xpopen = os.path.join(os.path.dirname(sys.exec_prefix),
+ w9xpopen = os.path.join(os.path.dirname(sys.base_exec_prefix),
"w9xpopen.exe")
if not os.path.exists(w9xpopen):
raise RuntimeError("Cannot locate w9xpopen.exe, which is "
@@ -909,7 +1035,7 @@ class Popen(object):
def _execute_child(self, args, executable, preexec_fn, close_fds,
- pass_fds, cwd, env, universal_newlines,
+ pass_fds, cwd, env,
startupinfo, creationflags, shell,
p2cread, p2cwrite,
c2pread, c2pwrite,
@@ -926,17 +1052,17 @@ class Popen(object):
if startupinfo is None:
startupinfo = STARTUPINFO()
if -1 not in (p2cread, c2pwrite, errwrite):
- startupinfo.dwFlags |= _subprocess.STARTF_USESTDHANDLES
+ startupinfo.dwFlags |= _winapi.STARTF_USESTDHANDLES
startupinfo.hStdInput = p2cread
startupinfo.hStdOutput = c2pwrite
startupinfo.hStdError = errwrite
if shell:
- startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW
- startupinfo.wShowWindow = _subprocess.SW_HIDE
+ startupinfo.dwFlags |= _winapi.STARTF_USESHOWWINDOW
+ startupinfo.wShowWindow = _winapi.SW_HIDE
comspec = os.environ.get("COMSPEC", "cmd.exe")
args = '{} /c "{}"'.format (comspec, args)
- if (_subprocess.GetVersion() >= 0x80000000 or
+ if (_winapi.GetVersion() >= 0x80000000 or
os.path.basename(comspec).lower() == "command.com"):
# Win9x, or using command.com on NT. We need to
# use the w9xpopen intermediate program. For more
@@ -950,11 +1076,11 @@ class Popen(object):
# use at xxx" and a hopeful warning about the
# stability of your system. Cost is Ctrl+C won't
# kill children.
- creationflags |= _subprocess.CREATE_NEW_CONSOLE
+ creationflags |= _winapi.CREATE_NEW_CONSOLE
# Start the process
try:
- hp, ht, pid, tid = _subprocess.CreateProcess(executable, args,
+ hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
# no special security
None, None,
int(not close_fds),
@@ -981,17 +1107,19 @@ class Popen(object):
c2pwrite.Close()
if errwrite != -1:
errwrite.Close()
+ if hasattr(self, '_devnull'):
+ os.close(self._devnull)
# Retain the process handle, but close the thread handle
self._child_created = True
- self._handle = hp
+ self._handle = Handle(hp)
self.pid = pid
- ht.Close()
+ _winapi.CloseHandle(ht)
def _internal_poll(self, _deadstate=None,
- _WaitForSingleObject=_subprocess.WaitForSingleObject,
- _WAIT_OBJECT_0=_subprocess.WAIT_OBJECT_0,
- _GetExitCodeProcess=_subprocess.GetExitCodeProcess):
+ _WaitForSingleObject=_winapi.WaitForSingleObject,
+ _WAIT_OBJECT_0=_winapi.WAIT_OBJECT_0,
+ _GetExitCodeProcess=_winapi.GetExitCodeProcess):
"""Check if child process has terminated. Returns returncode
attribute.
@@ -1005,13 +1133,21 @@ class Popen(object):
return self.returncode
- def wait(self):
+ def wait(self, timeout=None, endtime=None):
"""Wait for child process to terminate. Returns returncode
attribute."""
+ if endtime is not None:
+ timeout = self._remaining_time(endtime)
+ if timeout is None:
+ timeout_millis = _winapi.INFINITE
+ else:
+ timeout_millis = int(timeout * 1000)
if self.returncode is None:
- _subprocess.WaitForSingleObject(self._handle,
- _subprocess.INFINITE)
- self.returncode = _subprocess.GetExitCodeProcess(self._handle)
+ result = _winapi.WaitForSingleObject(self._handle,
+ timeout_millis)
+ if result == _winapi.WAIT_TIMEOUT:
+ raise TimeoutExpired(self.args, timeout)
+ self.returncode = _winapi.GetExitCodeProcess(self._handle)
return self.returncode
@@ -1020,22 +1156,23 @@ class Popen(object):
fh.close()
- def _communicate(self, input):
- stdout = None # Return
- stderr = None # Return
-
- if self.stdout:
- stdout = []
- stdout_thread = threading.Thread(target=self._readerthread,
- args=(self.stdout, stdout))
- stdout_thread.daemon = True
- stdout_thread.start()
- if self.stderr:
- stderr = []
- stderr_thread = threading.Thread(target=self._readerthread,
- args=(self.stderr, stderr))
- stderr_thread.daemon = True
- stderr_thread.start()
+ def _communicate(self, input, endtime, orig_timeout):
+ # Start reader threads feeding into a list hanging off of this
+ # object, unless they've already been started.
+ if self.stdout and not hasattr(self, "_stdout_buff"):
+ self._stdout_buff = []
+ self.stdout_thread = \
+ threading.Thread(target=self._readerthread,
+ args=(self.stdout, self._stdout_buff))
+ self.stdout_thread.daemon = True
+ self.stdout_thread.start()
+ if self.stderr and not hasattr(self, "_stderr_buff"):
+ self._stderr_buff = []
+ self.stderr_thread = \
+ threading.Thread(target=self._readerthread,
+ args=(self.stderr, self._stderr_buff))
+ self.stderr_thread.daemon = True
+ self.stderr_thread.start()
if self.stdin:
if input is not None:
@@ -1046,10 +1183,28 @@ class Popen(object):
raise
self.stdin.close()
+ # Wait for the reader threads, or time out. If we time out, the
+ # threads remain reading and the fds left open in case the user
+ # calls communicate again.
+ if self.stdout is not None:
+ self.stdout_thread.join(self._remaining_time(endtime))
+ if self.stdout_thread.is_alive():
+ raise TimeoutExpired(self.args, orig_timeout)
+ if self.stderr is not None:
+ self.stderr_thread.join(self._remaining_time(endtime))
+ if self.stderr_thread.is_alive():
+ raise TimeoutExpired(self.args, orig_timeout)
+
+ # Collect the output from and close both pipes, now that we know
+ # both have been read successfully.
+ stdout = None
+ stderr = None
if self.stdout:
- stdout_thread.join()
+ stdout = self._stdout_buff
+ self.stdout.close()
if self.stderr:
- stderr_thread.join()
+ stderr = self._stderr_buff
+ self.stderr.close()
# All data exchanged. Translate lists into strings.
if stdout is not None:
@@ -1057,7 +1212,6 @@ class Popen(object):
if stderr is not None:
stderr = stderr[0]
- self.wait()
return (stdout, stderr)
def send_signal(self, sig):
@@ -1076,14 +1230,12 @@ class Popen(object):
"""Terminates the process
"""
try:
- _subprocess.TerminateProcess(self._handle, 1)
- except OSError as e:
+ _winapi.TerminateProcess(self._handle, 1)
+ except PermissionError:
# ERROR_ACCESS_DENIED (winerror 5) is received when the
# process already died.
- if e.winerror != 5:
- raise
- rc = _subprocess.GetExitCodeProcess(self._handle)
- if rc == _subprocess.STILL_ACTIVE:
+ rc = _winapi.GetExitCodeProcess(self._handle)
+ if rc == _winapi.STILL_ACTIVE:
raise
self.returncode = rc
@@ -1105,6 +1257,8 @@ class Popen(object):
pass
elif stdin == PIPE:
p2cread, p2cwrite = _create_pipe()
+ elif stdin == DEVNULL:
+ p2cread = self._get_devnull()
elif isinstance(stdin, int):
p2cread = stdin
else:
@@ -1115,6 +1269,8 @@ class Popen(object):
pass
elif stdout == PIPE:
c2pread, c2pwrite = _create_pipe()
+ elif stdout == DEVNULL:
+ c2pwrite = self._get_devnull()
elif isinstance(stdout, int):
c2pwrite = stdout
else:
@@ -1127,6 +1283,8 @@ class Popen(object):
errread, errwrite = _create_pipe()
elif stderr == STDOUT:
errwrite = c2pwrite
+ elif stderr == DEVNULL:
+ errwrite = self._get_devnull()
elif isinstance(stderr, int):
errwrite = stderr
else:
@@ -1149,7 +1307,7 @@ class Popen(object):
def _execute_child(self, args, executable, preexec_fn, close_fds,
- pass_fds, cwd, env, universal_newlines,
+ pass_fds, cwd, env,
startupinfo, creationflags, shell,
p2cread, p2cwrite,
c2pread, c2pwrite,
@@ -1157,7 +1315,7 @@ class Popen(object):
restore_signals, start_new_session):
"""Execute program (POSIX version)"""
- if isinstance(args, str):
+ if isinstance(args, (str, bytes)):
args = [args]
else:
args = list(args)
@@ -1177,153 +1335,34 @@ class Popen(object):
errpipe_read, errpipe_write = _create_pipe()
try:
try:
-
- if _posixsubprocess:
- # We must avoid complex work that could involve
- # malloc or free in the child process to avoid
- # potential deadlocks, thus we do all this here.
- # and pass it to fork_exec()
-
- if env is not None:
- env_list = [os.fsencode(k) + b'=' + os.fsencode(v)
- for k, v in env.items()]
- else:
- env_list = None # Use execv instead of execve.
- executable = os.fsencode(executable)
- if os.path.dirname(executable):
- executable_list = (executable,)
- else:
- # This matches the behavior of os._execvpe().
- executable_list = tuple(
- os.path.join(os.fsencode(dir), executable)
- for dir in os.get_exec_path(env))
- fds_to_keep = set(pass_fds)
- fds_to_keep.add(errpipe_write)
- self.pid = _posixsubprocess.fork_exec(
- args, executable_list,
- close_fds, sorted(fds_to_keep), cwd, env_list,
- p2cread, p2cwrite, c2pread, c2pwrite,
- errread, errwrite,
- errpipe_read, errpipe_write,
- restore_signals, start_new_session, preexec_fn)
- self._child_created = True
+ # We must avoid complex work that could involve
+ # malloc or free in the child process to avoid
+ # potential deadlocks, thus we do all this here.
+ # and pass it to fork_exec()
+
+ if env is not None:
+ env_list = [os.fsencode(k) + b'=' + os.fsencode(v)
+ for k, v in env.items()]
else:
- # Pure Python implementation: It is not thread safe.
- # This implementation may deadlock in the child if your
- # parent process has any other threads running.
-
- gc_was_enabled = gc.isenabled()
- # Disable gc to avoid bug where gc -> file_dealloc ->
- # write to stderr -> hang. See issue1336
- gc.disable()
- try:
- self.pid = os.fork()
- except:
- if gc_was_enabled:
- gc.enable()
- raise
- self._child_created = True
- if self.pid == 0:
- # Child
- reached_preexec = False
- try:
- # Close parent's pipe ends
- if p2cwrite != -1:
- os.close(p2cwrite)
- if c2pread != -1:
- os.close(c2pread)
- if errread != -1:
- os.close(errread)
- os.close(errpipe_read)
-
- # When duping fds, if there arises a situation
- # where one of the fds is either 0, 1 or 2, it
- # is possible that it is overwritten (#12607).
- if c2pwrite == 0:
- c2pwrite = os.dup(c2pwrite)
- if errwrite == 0 or errwrite == 1:
- errwrite = os.dup(errwrite)
-
- # Dup fds for child
- def _dup2(a, b):
- # dup2() removes the CLOEXEC flag but
- # we must do it ourselves if dup2()
- # would be a no-op (issue #10806).
- if a == b:
- _set_cloexec(a, False)
- elif a != -1:
- os.dup2(a, b)
- _dup2(p2cread, 0)
- _dup2(c2pwrite, 1)
- _dup2(errwrite, 2)
-
- # Close pipe fds. Make sure we don't close the
- # same fd more than once, or standard fds.
- closed = set()
- for fd in [p2cread, c2pwrite, errwrite]:
- if fd > 2 and fd not in closed:
- os.close(fd)
- closed.add(fd)
-
- # Close all other fds, if asked for
- if close_fds:
- fds_to_keep = set(pass_fds)
- fds_to_keep.add(errpipe_write)
- self._close_fds(fds_to_keep)
-
-
- if cwd is not None:
- os.chdir(cwd)
-
- # This is a copy of Python/pythonrun.c
- # _Py_RestoreSignals(). If that were exposed
- # as a sys._py_restoresignals func it would be
- # better.. but this pure python implementation
- # isn't likely to be used much anymore.
- if restore_signals:
- signals = ('SIGPIPE', 'SIGXFZ', 'SIGXFSZ')
- for sig in signals:
- if hasattr(signal, sig):
- signal.signal(getattr(signal, sig),
- signal.SIG_DFL)
-
- if start_new_session and hasattr(os, 'setsid'):
- os.setsid()
-
- reached_preexec = True
- if preexec_fn:
- preexec_fn()
-
- if env is None:
- os.execvp(executable, args)
- else:
- os.execvpe(executable, args, env)
-
- except:
- try:
- exc_type, exc_value = sys.exc_info()[:2]
- if isinstance(exc_value, OSError):
- errno_num = exc_value.errno
- else:
- errno_num = 0
- if not reached_preexec:
- exc_value = "noexec"
- message = '%s:%x:%s' % (exc_type.__name__,
- errno_num, exc_value)
- message = message.encode(errors="surrogatepass")
- os.write(errpipe_write, message)
- except Exception:
- # We MUST not allow anything odd happening
- # above to prevent us from exiting below.
- pass
-
- # This exitcode won't be reported to applications
- # so it really doesn't matter what we return.
- os._exit(255)
-
- # Parent
- if gc_was_enabled:
- gc.enable()
+ env_list = None # Use execv instead of execve.
+ executable = os.fsencode(executable)
+ if os.path.dirname(executable):
+ executable_list = (executable,)
+ else:
+ # This matches the behavior of os._execvpe().
+ executable_list = tuple(
+ os.path.join(os.fsencode(dir), executable)
+ for dir in os.get_exec_path(env))
+ fds_to_keep = set(pass_fds)
+ fds_to_keep.add(errpipe_write)
+ self.pid = _posixsubprocess.fork_exec(
+ args, executable_list,
+ close_fds, sorted(fds_to_keep), cwd, env_list,
+ p2cread, p2cwrite, c2pread, c2pwrite,
+ errread, errwrite,
+ errpipe_read, errpipe_write,
+ restore_signals, start_new_session, preexec_fn)
+ self._child_created = True
finally:
# be sure the FD is closed no matter what
os.close(errpipe_write)
@@ -1334,6 +1373,8 @@ class Popen(object):
os.close(c2pwrite)
if errwrite != -1 and errread != -1:
os.close(errwrite)
+ if hasattr(self, '_devnull'):
+ os.close(self._devnull)
# Wait for exec to fail or succeed; possibly raising an
# exception (limited in size)
@@ -1425,25 +1466,57 @@ class Popen(object):
return self.returncode
- def wait(self):
+ def _try_wait(self, wait_flags):
+ try:
+ (pid, sts) = _eintr_retry_call(os.waitpid, self.pid, wait_flags)
+ except OSError as e:
+ if e.errno != errno.ECHILD:
+ raise
+ # This happens if SIGCLD is set to be ignored or waiting
+ # for child processes has otherwise been disabled for our
+ # process. This child is dead, we can't get the status.
+ pid = self.pid
+ sts = 0
+ return (pid, sts)
+
+
+ def wait(self, timeout=None, endtime=None):
"""Wait for child process to terminate. Returns returncode
attribute."""
- if self.returncode is None:
- try:
- pid, sts = _eintr_retry_call(os.waitpid, self.pid, 0)
- except OSError as e:
- if e.errno != errno.ECHILD:
- raise
- # This happens if SIGCLD is set to be ignored or waiting
- # for child processes has otherwise been disabled for our
- # process. This child is dead, we can't get the status.
- sts = 0
+ if self.returncode is not None:
+ return self.returncode
+
+ # endtime is preferred to timeout. timeout is only used for
+ # printing.
+ if endtime is not None or timeout is not None:
+ if endtime is None:
+ endtime = _time() + timeout
+ elif timeout is None:
+ timeout = self._remaining_time(endtime)
+
+ if endtime is not None:
+ # Enter a busy loop if we have a timeout. This busy loop was
+ # cribbed from Lib/threading.py in Thread.wait() at r71065.
+ delay = 0.0005 # 500 us -> initial delay of 1 ms
+ while True:
+ (pid, sts) = self._try_wait(os.WNOHANG)
+ assert pid == self.pid or pid == 0
+ if pid == self.pid:
+ self._handle_exitstatus(sts)
+ break
+ remaining = self._remaining_time(endtime)
+ if remaining <= 0:
+ raise TimeoutExpired(self.args, timeout)
+ delay = min(delay * 2, remaining, .05)
+ time.sleep(delay)
+ elif self.returncode is None:
+ (pid, sts) = self._try_wait(0)
self._handle_exitstatus(sts)
return self.returncode
- def _communicate(self, input):
- if self.stdin:
+ def _communicate(self, input, endtime, orig_timeout):
+ if self.stdin and not self._communication_started:
# Flush stdio buffer. This might block, if the user has
# been writing to .stdin in an uncontrolled fashion.
self.stdin.flush()
@@ -1451,9 +1524,13 @@ class Popen(object):
self.stdin.close()
if _has_poll:
- stdout, stderr = self._communicate_with_poll(input)
+ stdout, stderr = self._communicate_with_poll(input, endtime,
+ orig_timeout)
else:
- stdout, stderr = self._communicate_with_select(input)
+ stdout, stderr = self._communicate_with_select(input, endtime,
+ orig_timeout)
+
+ self.wait(timeout=self._remaining_time(endtime))
# All data exchanged. Translate lists into strings.
if stdout is not None:
@@ -1471,67 +1548,92 @@ class Popen(object):
stderr = self._translate_newlines(stderr,
self.stderr.encoding)
- self.wait()
return (stdout, stderr)
- def _communicate_with_poll(self, input):
+ def _save_input(self, input):
+ # This method is called from the _communicate_with_*() methods
+ # so that if we time out while communicating, we can continue
+ # sending input if we retry.
+ if self.stdin and self._input is None:
+ self._input_offset = 0
+ self._input = input
+ if self.universal_newlines and input is not None:
+ self._input = self._input.encode(self.stdin.encoding)
+
+
+ def _communicate_with_poll(self, input, endtime, orig_timeout):
stdout = None # Return
stderr = None # Return
- fd2file = {}
- fd2output = {}
+
+ if not self._communication_started:
+ self._fd2file = {}
poller = select.poll()
def register_and_append(file_obj, eventmask):
poller.register(file_obj.fileno(), eventmask)
- fd2file[file_obj.fileno()] = file_obj
+ self._fd2file[file_obj.fileno()] = file_obj
def close_unregister_and_remove(fd):
poller.unregister(fd)
- fd2file[fd].close()
- fd2file.pop(fd)
+ self._fd2file[fd].close()
+ self._fd2file.pop(fd)
if self.stdin and input:
register_and_append(self.stdin, select.POLLOUT)
+ # Only create this mapping if we haven't already.
+ if not self._communication_started:
+ self._fd2output = {}
+ if self.stdout:
+ self._fd2output[self.stdout.fileno()] = []
+ if self.stderr:
+ self._fd2output[self.stderr.fileno()] = []
+
select_POLLIN_POLLPRI = select.POLLIN | select.POLLPRI
if self.stdout:
register_and_append(self.stdout, select_POLLIN_POLLPRI)
- fd2output[self.stdout.fileno()] = stdout = []
+ stdout = self._fd2output[self.stdout.fileno()]
if self.stderr:
register_and_append(self.stderr, select_POLLIN_POLLPRI)
- fd2output[self.stderr.fileno()] = stderr = []
+ stderr = self._fd2output[self.stderr.fileno()]
- input_offset = 0
- while fd2file:
+ self._save_input(input)
+
+ while self._fd2file:
+ timeout = self._remaining_time(endtime)
+ if timeout is not None and timeout < 0:
+ raise TimeoutExpired(self.args, orig_timeout)
try:
- ready = poller.poll()
+ ready = poller.poll(timeout)
except select.error as e:
if e.args[0] == errno.EINTR:
continue
raise
+ self._check_timeout(endtime, orig_timeout)
# XXX Rewrite these to use non-blocking I/O on the
# file objects; they are no longer using C stdio!
for fd, mode in ready:
if mode & select.POLLOUT:
- chunk = input[input_offset : input_offset + _PIPE_BUF]
+ chunk = self._input[self._input_offset :
+ self._input_offset + _PIPE_BUF]
try:
- input_offset += os.write(fd, chunk)
+ self._input_offset += os.write(fd, chunk)
except OSError as e:
if e.errno == errno.EPIPE:
close_unregister_and_remove(fd)
else:
raise
else:
- if input_offset >= len(input):
+ if self._input_offset >= len(self._input):
close_unregister_and_remove(fd)
elif mode & select_POLLIN_POLLPRI:
data = os.read(fd, 4096)
if not data:
close_unregister_and_remove(fd)
- fd2output[fd].append(data)
+ self._fd2output[fd].append(data)
else:
# Ignore hang up or errors.
close_unregister_and_remove(fd)
@@ -1539,61 +1641,83 @@ class Popen(object):
return (stdout, stderr)
- def _communicate_with_select(self, input):
- read_set = []
- write_set = []
+ def _communicate_with_select(self, input, endtime, orig_timeout):
+ if not self._communication_started:
+ self._read_set = []
+ self._write_set = []
+ if self.stdin and input:
+ self._write_set.append(self.stdin)
+ if self.stdout:
+ self._read_set.append(self.stdout)
+ if self.stderr:
+ self._read_set.append(self.stderr)
+
+ self._save_input(input)
+
stdout = None # Return
stderr = None # Return
- if self.stdin and input:
- write_set.append(self.stdin)
if self.stdout:
- read_set.append(self.stdout)
- stdout = []
+ if not self._communication_started:
+ self._stdout_buff = []
+ stdout = self._stdout_buff
if self.stderr:
- read_set.append(self.stderr)
- stderr = []
-
- input_offset = 0
- while read_set or write_set:
+ if not self._communication_started:
+ self._stderr_buff = []
+ stderr = self._stderr_buff
+
+ while self._read_set or self._write_set:
+ timeout = self._remaining_time(endtime)
+ if timeout is not None and timeout < 0:
+ raise TimeoutExpired(self.args, orig_timeout)
try:
- rlist, wlist, xlist = select.select(read_set, write_set, [])
+ (rlist, wlist, xlist) = \
+ select.select(self._read_set, self._write_set, [],
+ timeout)
except select.error as e:
if e.args[0] == errno.EINTR:
continue
raise
+ # According to the docs, returning three empty lists indicates
+ # that the timeout expired.
+ if not (rlist or wlist or xlist):
+ raise TimeoutExpired(self.args, orig_timeout)
+ # We also check what time it is ourselves for good measure.
+ self._check_timeout(endtime, orig_timeout)
+
# XXX Rewrite these to use non-blocking I/O on the
# file objects; they are no longer using C stdio!
if self.stdin in wlist:
- chunk = input[input_offset : input_offset + _PIPE_BUF]
+ chunk = self._input[self._input_offset :
+ self._input_offset + _PIPE_BUF]
try:
bytes_written = os.write(self.stdin.fileno(), chunk)
except OSError as e:
if e.errno == errno.EPIPE:
self.stdin.close()
- write_set.remove(self.stdin)
+ self._write_set.remove(self.stdin)
else:
raise
else:
- input_offset += bytes_written
- if input_offset >= len(input):
+ self._input_offset += bytes_written
+ if self._input_offset >= len(self._input):
self.stdin.close()
- write_set.remove(self.stdin)
+ self._write_set.remove(self.stdin)
if self.stdout in rlist:
data = os.read(self.stdout.fileno(), 1024)
if not data:
self.stdout.close()
- read_set.remove(self.stdout)
+ self._read_set.remove(self.stdout)
stdout.append(data)
if self.stderr in rlist:
data = os.read(self.stderr.fileno(), 1024)
if not data:
self.stderr.close()
- read_set.remove(self.stderr)
+ self._read_set.remove(self.stderr)
stderr.append(data)
return (stdout, stderr)
@@ -1613,68 +1737,3 @@ class Popen(object):
"""Kill the process with SIGKILL
"""
self.send_signal(signal.SIGKILL)
-
-
-def _demo_posix():
- #
- # Example 1: Simple redirection: Get process list
- #
- plist = Popen(["ps"], stdout=PIPE).communicate()[0]
- print("Process list:")
- print(plist)
-
- #
- # Example 2: Change uid before executing child
- #
- if os.getuid() == 0:
- p = Popen(["id"], preexec_fn=lambda: os.setuid(100))
- p.wait()
-
- #
- # Example 3: Connecting several subprocesses
- #
- print("Looking for 'hda'...")
- p1 = Popen(["dmesg"], stdout=PIPE)
- p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE)
- print(repr(p2.communicate()[0]))
-
- #
- # Example 4: Catch execution error
- #
- print()
- print("Trying a weird file...")
- try:
- print(Popen(["/this/path/does/not/exist"]).communicate())
- except OSError as e:
- if e.errno == errno.ENOENT:
- print("The file didn't exist. I thought so...")
- print("Child traceback:")
- print(e.child_traceback)
- else:
- print("Error", e.errno)
- else:
- print("Gosh. No error.", file=sys.stderr)
-
-
-def _demo_windows():
- #
- # Example 1: Connecting several subprocesses
- #
- print("Looking for 'PROMPT' in set output...")
- p1 = Popen("set", stdout=PIPE, shell=True)
- p2 = Popen('find "PROMPT"', stdin=p1.stdout, stdout=PIPE)
- print(repr(p2.communicate()[0]))
-
- #
- # Example 2: Simple execution of program
- #
- print("Executing calc...")
- p = Popen("calc")
- p.wait()
-
-
-if __name__ == "__main__":
- if mswindows:
- _demo_windows()
- else:
- _demo_posix()
diff --git a/Lib/sysconfig.py b/Lib/sysconfig.py
index b2183d8..71da1db 100644
--- a/Lib/sysconfig.py
+++ b/Lib/sysconfig.py
@@ -1,8 +1,8 @@
-"""Provide access to Python's configuration information.
+"""Access to Python's configuration information."""
-"""
-import sys
import os
+import re
+import sys
from os.path import pardir, realpath
__all__ = [
@@ -17,50 +17,50 @@ __all__ = [
'get_python_version',
'get_scheme_names',
'parse_config_h',
- ]
+]
_INSTALL_SCHEMES = {
'posix_prefix': {
- 'stdlib': '{base}/lib/python{py_version_short}',
+ 'stdlib': '{installed_base}/lib/python{py_version_short}',
'platstdlib': '{platbase}/lib/python{py_version_short}',
'purelib': '{base}/lib/python{py_version_short}/site-packages',
'platlib': '{platbase}/lib/python{py_version_short}/site-packages',
'include':
- '{base}/include/python{py_version_short}{abiflags}',
+ '{installed_base}/include/python{py_version_short}{abiflags}',
'platinclude':
- '{platbase}/include/python{py_version_short}{abiflags}',
+ '{installed_platbase}/include/python{py_version_short}{abiflags}',
'scripts': '{base}/bin',
'data': '{base}',
},
'posix_home': {
- 'stdlib': '{base}/lib/python',
+ 'stdlib': '{installed_base}/lib/python',
'platstdlib': '{base}/lib/python',
'purelib': '{base}/lib/python',
'platlib': '{base}/lib/python',
- 'include': '{base}/include/python',
- 'platinclude': '{base}/include/python',
+ 'include': '{installed_base}/include/python',
+ 'platinclude': '{installed_base}/include/python',
'scripts': '{base}/bin',
- 'data' : '{base}',
+ 'data': '{base}',
},
'nt': {
- 'stdlib': '{base}/Lib',
+ 'stdlib': '{installed_base}/Lib',
'platstdlib': '{base}/Lib',
'purelib': '{base}/Lib/site-packages',
'platlib': '{base}/Lib/site-packages',
- 'include': '{base}/Include',
- 'platinclude': '{base}/Include',
+ 'include': '{installed_base}/Include',
+ 'platinclude': '{installed_base}/Include',
'scripts': '{base}/Scripts',
- 'data' : '{base}',
+ 'data': '{base}',
},
'os2': {
- 'stdlib': '{base}/Lib',
+ 'stdlib': '{installed_base}/Lib',
'platstdlib': '{base}/Lib',
'purelib': '{base}/Lib/site-packages',
'platlib': '{base}/Lib/site-packages',
- 'include': '{base}/Include',
- 'platinclude': '{base}/Include',
+ 'include': '{installed_base}/Include',
+ 'platinclude': '{installed_base}/Include',
'scripts': '{base}/Scripts',
- 'data' : '{base}',
+ 'data': '{base}',
},
'os2_home': {
'stdlib': '{userbase}/lib/python{py_version_short}',
@@ -69,7 +69,7 @@ _INSTALL_SCHEMES = {
'platlib': '{userbase}/lib/python{py_version_short}/site-packages',
'include': '{userbase}/include/python{py_version_short}',
'scripts': '{userbase}/bin',
- 'data' : '{userbase}',
+ 'data': '{userbase}',
},
'nt_user': {
'stdlib': '{userbase}/Python{py_version_nodot}',
@@ -78,7 +78,7 @@ _INSTALL_SCHEMES = {
'platlib': '{userbase}/Python{py_version_nodot}/site-packages',
'include': '{userbase}/Python{py_version_nodot}/Include',
'scripts': '{userbase}/Scripts',
- 'data' : '{userbase}',
+ 'data': '{userbase}',
},
'posix_user': {
'stdlib': '{userbase}/lib/python{py_version_short}',
@@ -87,7 +87,7 @@ _INSTALL_SCHEMES = {
'platlib': '{userbase}/lib/python{py_version_short}/site-packages',
'include': '{userbase}/include/python{py_version_short}',
'scripts': '{userbase}/bin',
- 'data' : '{userbase}',
+ 'data': '{userbase}',
},
'osx_framework_user': {
'stdlib': '{userbase}/lib/python',
@@ -96,20 +96,26 @@ _INSTALL_SCHEMES = {
'platlib': '{userbase}/lib/python/site-packages',
'include': '{userbase}/include',
'scripts': '{userbase}/bin',
- 'data' : '{userbase}',
+ 'data': '{userbase}',
},
}
_SCHEME_KEYS = ('stdlib', 'platstdlib', 'purelib', 'platlib', 'include',
'scripts', 'data')
+
+ # FIXME don't rely on sys.version here, its format is an implementation detail
+ # of CPython, use sys.version_info or sys.hexversion
_PY_VERSION = sys.version.split()[0]
_PY_VERSION_SHORT = sys.version[:3]
_PY_VERSION_SHORT_NO_DOT = _PY_VERSION[0] + _PY_VERSION[2]
_PREFIX = os.path.normpath(sys.prefix)
+_BASE_PREFIX = os.path.normpath(sys.base_prefix)
_EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
+_BASE_EXEC_PREFIX = os.path.normpath(sys.base_exec_prefix)
_CONFIG_VARS = None
_USER_BASE = None
+
def _safe_realpath(path):
try:
return realpath(path)
@@ -132,19 +138,35 @@ if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower():
if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower():
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
-def is_python_build():
+# set for cross builds
+if "_PYTHON_PROJECT_BASE" in os.environ:
+ _PROJECT_BASE = _safe_realpath(os.environ["_PYTHON_PROJECT_BASE"])
+
+def _is_python_source_dir(d):
for fn in ("Setup.dist", "Setup.local"):
- if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)):
+ if os.path.isfile(os.path.join(d, "Modules", fn)):
return True
return False
-_PYTHON_BUILD = is_python_build()
+_sys_home = getattr(sys, '_home', None)
+if _sys_home and os.name == 'nt' and \
+ _sys_home.lower().endswith(('pcbuild', 'pcbuild\\amd64')):
+ _sys_home = os.path.dirname(_sys_home)
+ if _sys_home.endswith('pcbuild'): # must be amd64
+ _sys_home = os.path.dirname(_sys_home)
+def is_python_build(check_home=False):
+ if check_home and _sys_home:
+ return _is_python_source_dir(_sys_home)
+ return _is_python_source_dir(_PROJECT_BASE)
+
+_PYTHON_BUILD = is_python_build(True)
if _PYTHON_BUILD:
for scheme in ('posix_prefix', 'posix_home'):
_INSTALL_SCHEMES[scheme]['include'] = '{srcdir}/Include'
_INSTALL_SCHEMES[scheme]['platinclude'] = '{projectbase}/.'
+
def _subst_vars(s, local_vars):
try:
return s.format(**local_vars)
@@ -161,6 +183,7 @@ def _extend_dict(target_dict, other_dict):
continue
target_dict[key] = value
+
def _expand_vars(scheme, vars):
res = {}
if vars is None:
@@ -173,29 +196,41 @@ def _expand_vars(scheme, vars):
res[key] = os.path.normpath(_subst_vars(value, vars))
return res
+
def _get_default_scheme():
if os.name == 'posix':
# the default scheme for posix is posix_prefix
return 'posix_prefix'
return os.name
+
def _getuserbase():
env_base = os.environ.get("PYTHONUSERBASE", None)
+
def joinuser(*args):
return os.path.expanduser(os.path.join(*args))
# what about 'os2emx', 'riscos' ?
if os.name == "nt":
base = os.environ.get("APPDATA") or "~"
- return env_base if env_base else joinuser(base, "Python")
+ if env_base:
+ return env_base
+ else:
+ return joinuser(base, "Python")
if sys.platform == "darwin":
framework = get_config_var("PYTHONFRAMEWORK")
if framework:
- return env_base if env_base else joinuser("~", "Library", framework, "%d.%d"%(
- sys.version_info[:2]))
+ if env_base:
+ return env_base
+ else:
+ return joinuser("~", "Library", framework, "%d.%d" %
+ sys.version_info[:2])
- return env_base if env_base else joinuser("~", ".local")
+ if env_base:
+ return env_base
+ else:
+ return joinuser("~", ".local")
def _parse_makefile(filename, vars=None):
@@ -205,7 +240,6 @@ def _parse_makefile(filename, vars=None):
optional dictionary is passed in as the second argument, it is
used instead of a new dictionary.
"""
- import re
# Regexes needed for parsing Makefile (and similar syntaxes,
# like old-style Setup files).
_variable_rx = re.compile("([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)")
@@ -267,7 +301,8 @@ def _parse_makefile(filename, vars=None):
item = os.environ[n]
elif n in renamed_variables:
- if name.startswith('PY_') and name[3:] in renamed_variables:
+ if (name.startswith('PY_') and
+ name[3:] in renamed_variables):
item = ""
elif 'PY_' + n in notdone:
@@ -300,7 +335,6 @@ def _parse_makefile(filename, vars=None):
if name not in done:
done[name] = value
-
else:
# bogus variable reference (e.g. "prefix=$/opt/python");
# just drop it since we can't deal
@@ -320,14 +354,17 @@ def _parse_makefile(filename, vars=None):
def get_makefile_filename():
"""Return the path of the Makefile."""
if _PYTHON_BUILD:
- return os.path.join(_PROJECT_BASE, "Makefile")
- return os.path.join(get_path('stdlib'),
- 'config-{}{}'.format(_PY_VERSION_SHORT, sys.abiflags),
- 'Makefile')
-
+ return os.path.join(_sys_home or _PROJECT_BASE, "Makefile")
+ if hasattr(sys, 'abiflags'):
+ config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags)
+ else:
+ config_dir_name = 'config'
+ return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile')
-def _init_posix(vars):
- """Initialize the module as appropriate for POSIX systems."""
+def _generate_posix_vars():
+ """Generate the Python module containing build-time variables."""
+ import pprint
+ vars = {}
# load the installed Makefile:
makefile = get_makefile_filename()
try:
@@ -353,6 +390,46 @@ def _init_posix(vars):
if _PYTHON_BUILD:
vars['LDSHARED'] = vars['BLDSHARED']
+ # There's a chicken-and-egg situation on OS X with regards to the
+ # _sysconfigdata module after the changes introduced by #15298:
+ # get_config_vars() is called by get_platform() as part of the
+ # `make pybuilddir.txt` target -- which is a precursor to the
+ # _sysconfigdata.py module being constructed. Unfortunately,
+ # get_config_vars() eventually calls _init_posix(), which attempts
+ # to import _sysconfigdata, which we won't have built yet. In order
+ # for _init_posix() to work, if we're on Darwin, just mock up the
+ # _sysconfigdata module manually and populate it with the build vars.
+ # This is more than sufficient for ensuring the subsequent call to
+ # get_platform() succeeds.
+ name = '_sysconfigdata'
+ if 'darwin' in sys.platform:
+ import imp
+ module = imp.new_module(name)
+ module.build_time_vars = vars
+ sys.modules[name] = module
+
+ pybuilddir = 'build/lib.%s-%s' % (get_platform(), sys.version[:3])
+ if hasattr(sys, "gettotalrefcount"):
+ pybuilddir += '-pydebug'
+ os.makedirs(pybuilddir, exist_ok=True)
+ destfile = os.path.join(pybuilddir, name + '.py')
+
+ with open(destfile, 'w', encoding='utf8') as f:
+ f.write('# system configuration generated and used by'
+ ' the sysconfig module\n')
+ f.write('build_time_vars = ')
+ pprint.pprint(vars, stream=f)
+
+ # Create file used for sys.path fixup -- see Modules/getpath.c
+ with open('pybuilddir.txt', 'w', encoding='ascii') as f:
+ f.write(pybuilddir)
+
+def _init_posix(vars):
+ """Initialize the module as appropriate for POSIX systems."""
+ # _sysconfigdata is generated at build time, see _generate_posix_vars()
+ from _sysconfigdata import build_time_vars
+ vars.update(build_time_vars)
+
def _init_non_posix(vars):
"""Initialize the module as appropriate for NT"""
# set basic install directories
@@ -376,7 +453,6 @@ def parse_config_h(fp, vars=None):
optional dictionary is passed in as the second argument, it is
used instead of a new dictionary.
"""
- import re
if vars is None:
vars = {}
define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n")
@@ -389,8 +465,10 @@ def parse_config_h(fp, vars=None):
m = define_rx.match(line)
if m:
n, v = m.group(1, 2)
- try: v = int(v)
- except ValueError: pass
+ try:
+ v = int(v)
+ except ValueError:
+ pass
vars[n] = v
else:
m = undef_rx.match(line)
@@ -398,27 +476,29 @@ def parse_config_h(fp, vars=None):
vars[m.group(1)] = 0
return vars
+
def get_config_h_filename():
"""Return the path of pyconfig.h."""
if _PYTHON_BUILD:
if os.name == "nt":
- inc_dir = os.path.join(_PROJECT_BASE, "PC")
+ inc_dir = os.path.join(_sys_home or _PROJECT_BASE, "PC")
else:
- inc_dir = _PROJECT_BASE
+ inc_dir = _sys_home or _PROJECT_BASE
else:
inc_dir = get_path('platinclude')
return os.path.join(inc_dir, 'pyconfig.h')
+
def get_scheme_names():
"""Return a tuple containing the schemes names."""
- schemes = list(_INSTALL_SCHEMES.keys())
- schemes.sort()
- return tuple(schemes)
+ return tuple(sorted(_INSTALL_SCHEMES))
+
def get_path_names():
"""Return a tuple containing the paths names."""
return _SCHEME_KEYS
+
def get_paths(scheme=_get_default_scheme(), vars=None, expand=True):
"""Return a mapping containing an install scheme.
@@ -430,6 +510,7 @@ def get_paths(scheme=_get_default_scheme(), vars=None, expand=True):
else:
return _INSTALL_SCHEMES[scheme]
+
def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True):
"""Return a path corresponding to the scheme.
@@ -437,17 +518,17 @@ def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True):
"""
return get_paths(scheme, vars, expand)[name]
+
def get_config_vars(*args):
"""With no arguments, return a dictionary of all configuration
variables relevant for the current platform.
On Unix, this means every variable defined in Python's installed Makefile;
- On Windows and Mac OS it's a much smaller set.
+ On Windows it's a much smaller set.
With arguments, return a list of values that result from looking up
each argument in the configuration variable dictionary.
"""
- import re
global _CONFIG_VARS
if _CONFIG_VARS is None:
_CONFIG_VARS = {}
@@ -459,7 +540,9 @@ def get_config_vars(*args):
_CONFIG_VARS['py_version'] = _PY_VERSION
_CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT
_CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2]
+ _CONFIG_VARS['installed_base'] = _BASE_PREFIX
_CONFIG_VARS['base'] = _PREFIX
+ _CONFIG_VARS['installed_platbase'] = _BASE_EXEC_PREFIX
_CONFIG_VARS['platbase'] = _EXEC_PREFIX
_CONFIG_VARS['projectbase'] = _PROJECT_BASE
try:
@@ -477,88 +560,28 @@ def get_config_vars(*args):
# the init-function.
_CONFIG_VARS['userbase'] = _getuserbase()
- if 'srcdir' not in _CONFIG_VARS:
- _CONFIG_VARS['srcdir'] = _PROJECT_BASE
- else:
- _CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir'])
-
-
- # Convert srcdir into an absolute path if it appears necessary.
- # Normally it is relative to the build directory. However, during
- # testing, for example, we might be running a non-installed python
- # from a different directory.
- if _PYTHON_BUILD and os.name == "posix":
- base = _PROJECT_BASE
- try:
- cwd = os.getcwd()
- except OSError:
- cwd = None
- if (not os.path.isabs(_CONFIG_VARS['srcdir']) and
- base != cwd):
- # srcdir is relative and we are not in the same directory
- # as the executable. Assume executable is in the build
- # directory and make srcdir absolute.
- srcdir = os.path.join(base, _CONFIG_VARS['srcdir'])
- _CONFIG_VARS['srcdir'] = os.path.normpath(srcdir)
-
- if sys.platform == 'darwin':
- kernel_version = os.uname()[2] # Kernel version (8.4.3)
- major_version = int(kernel_version.split('.')[0])
-
- if major_version < 8:
- # On Mac OS X before 10.4, check if -arch and -isysroot
- # are in CFLAGS or LDFLAGS and remove them if they are.
- # This is needed when building extensions on a 10.3 system
- # using a universal build of python.
- for key in ('LDFLAGS', 'BASECFLAGS',
- # a number of derived variables. These need to be
- # patched up as well.
- 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
- flags = _CONFIG_VARS[key]
- flags = re.sub('-arch\s+\w+\s', ' ', flags)
- flags = re.sub('-isysroot [^ \t]*', ' ', flags)
- _CONFIG_VARS[key] = flags
+ # Always convert srcdir to an absolute path
+ srcdir = _CONFIG_VARS.get('srcdir', _PROJECT_BASE)
+ if os.name == 'posix':
+ if _PYTHON_BUILD:
+ # If srcdir is a relative path (typically '.' or '..')
+ # then it should be interpreted relative to the directory
+ # containing Makefile.
+ base = os.path.dirname(get_makefile_filename())
+ srcdir = os.path.join(base, srcdir)
else:
- # Allow the user to override the architecture flags using
- # an environment variable.
- # NOTE: This name was introduced by Apple in OSX 10.5 and
- # is used by several scripting languages distributed with
- # that OS release.
- if 'ARCHFLAGS' in os.environ:
- arch = os.environ['ARCHFLAGS']
- for key in ('LDFLAGS', 'BASECFLAGS',
- # a number of derived variables. These need to be
- # patched up as well.
- 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
-
- flags = _CONFIG_VARS[key]
- flags = re.sub('-arch\s+\w+\s', ' ', flags)
- flags = flags + ' ' + arch
- _CONFIG_VARS[key] = flags
-
- # If we're on OSX 10.5 or later and the user tries to
- # compiles an extension using an SDK that is not present
- # on the current machine it is better to not use an SDK
- # than to fail.
- #
- # The major usecase for this is users using a Python.org
- # binary installer on OSX 10.6: that installer uses
- # the 10.4u SDK, but that SDK is not installed by default
- # when you install Xcode.
- #
- CFLAGS = _CONFIG_VARS.get('CFLAGS', '')
- m = re.search('-isysroot\s+(\S+)', CFLAGS)
- if m is not None:
- sdk = m.group(1)
- if not os.path.exists(sdk):
- for key in ('LDFLAGS', 'BASECFLAGS',
- # a number of derived variables. These need to be
- # patched up as well.
- 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
-
- flags = _CONFIG_VARS[key]
- flags = re.sub('-isysroot\s+\S+(\s|$)', ' ', flags)
- _CONFIG_VARS[key] = flags
+ # srcdir is not meaningful since the installation is
+ # spread about the filesystem. We choose the
+ # directory containing the Makefile since we know it
+ # exists.
+ srcdir = os.path.dirname(get_makefile_filename())
+ _CONFIG_VARS['srcdir'] = _safe_realpath(srcdir)
+
+ # OS X platforms require special customization to handle
+ # multi-architecture, multi-os-version installers
+ if sys.platform == 'darwin':
+ import _osx_support
+ _osx_support.customize_config_vars(_CONFIG_VARS)
if args:
vals = []
@@ -568,6 +591,7 @@ def get_config_vars(*args):
else:
return _CONFIG_VARS
+
def get_config_var(name):
"""Return the value of a single variable using the dictionary returned by
'get_config_vars()'.
@@ -576,6 +600,7 @@ def get_config_var(name):
"""
return get_config_vars().get(name)
+
def get_platform():
"""Return a string that identifies the current platform.
@@ -601,7 +626,6 @@ def get_platform():
For other non-POSIX platforms, currently just returns 'sys.platform'.
"""
- import re
if os.name == 'nt':
# sniff sys.version for architecture.
prefix = " bit ("
@@ -617,10 +641,13 @@ def get_platform():
return sys.platform
if os.name != "posix" or not hasattr(os, 'uname'):
- # XXX what about the architecture? NT is Intel or Alpha,
- # Mac OS is M68k or PPC, etc.
+ # XXX what about the architecture? NT is Intel or Alpha
return sys.platform
+ # Set for cross builds explicitly
+ if "_PYTHON_HOST_PLATFORM" in os.environ:
+ return os.environ["_PYTHON_HOST_PLATFORM"]
+
# Try to distinguish various flavours of Unix
osname, host, release, version, machine = os.uname()
@@ -651,97 +678,15 @@ def get_platform():
return "%s-%s.%s" % (osname, version, release)
elif osname[:6] == "cygwin":
osname = "cygwin"
- rel_re = re.compile (r'[\d.]+')
+ rel_re = re.compile(r'[\d.]+')
m = rel_re.match(release)
if m:
release = m.group()
elif osname[:6] == "darwin":
- #
- # For our purposes, we'll assume that the system version from
- # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
- # to. This makes the compatibility story a bit more sane because the
- # machine is going to compile and link as if it were
- # MACOSX_DEPLOYMENT_TARGET.
- #
- cfgvars = get_config_vars()
- macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
-
- if 1:
- # Always calculate the release of the running machine,
- # needed to determine if we can build fat binaries or not.
-
- macrelease = macver
- # Get the system version. Reading this plist is a documented
- # way to get the system version (see the documentation for
- # the Gestalt Manager)
- try:
- f = open('/System/Library/CoreServices/SystemVersion.plist')
- except IOError:
- # We're on a plain darwin box, fall back to the default
- # behaviour.
- pass
- else:
- try:
- m = re.search(
- r'<key>ProductUserVisibleVersion</key>\s*' +
- r'<string>(.*?)</string>', f.read())
- if m is not None:
- macrelease = '.'.join(m.group(1).split('.')[:2])
- # else: fall back to the default behaviour
- finally:
- f.close()
-
- if not macver:
- macver = macrelease
-
- if macver:
- release = macver
- osname = "macosx"
-
- if (macrelease + '.') >= '10.4.' and \
- '-arch' in get_config_vars().get('CFLAGS', '').strip():
- # The universal build will build fat binaries, but not on
- # systems before 10.4
- #
- # Try to detect 4-way universal builds, those have machine-type
- # 'universal' instead of 'fat'.
-
- machine = 'fat'
- cflags = get_config_vars().get('CFLAGS')
-
- archs = re.findall('-arch\s+(\S+)', cflags)
- archs = tuple(sorted(set(archs)))
-
- if len(archs) == 1:
- machine = archs[0]
- elif archs == ('i386', 'ppc'):
- machine = 'fat'
- elif archs == ('i386', 'x86_64'):
- machine = 'intel'
- elif archs == ('i386', 'ppc', 'x86_64'):
- machine = 'fat3'
- elif archs == ('ppc64', 'x86_64'):
- machine = 'fat64'
- elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
- machine = 'universal'
- else:
- raise ValueError(
- "Don't know machine value for archs=%r"%(archs,))
-
- elif machine == 'i386':
- # On OSX the machine type returned by uname is always the
- # 32-bit variant, even if the executable architecture is
- # the 64-bit variant
- if sys.maxsize >= 2**32:
- machine = 'x86_64'
-
- elif machine in ('PowerPC', 'Power_Macintosh'):
- # Pick a sane name for the PPC architecture.
- # See 'i386' case
- if sys.maxsize >= 2**32:
- machine = 'ppc64'
- else:
- machine = 'ppc'
+ import _osx_support
+ osname, release, machine = _osx_support.get_platform_osx(
+ get_config_vars(),
+ osname, release, machine)
return "%s-%s-%s" % (osname, release, machine)
@@ -749,21 +694,27 @@ def get_platform():
def get_python_version():
return _PY_VERSION_SHORT
+
def _print_dict(title, data):
for index, (key, value) in enumerate(sorted(data.items())):
if index == 0:
- print('{0}: '.format(title))
- print('\t{0} = "{1}"'.format(key, value))
+ print('%s: ' % (title))
+ print('\t%s = "%s"' % (key, value))
+
def _main():
"""Display all information sysconfig detains."""
- print('Platform: "{0}"'.format(get_platform()))
- print('Python version: "{0}"'.format(get_python_version()))
- print('Current installation scheme: "{0}"'.format(_get_default_scheme()))
- print('')
+ if '--generate-posix-vars' in sys.argv:
+ _generate_posix_vars()
+ return
+ print('Platform: "%s"' % get_platform())
+ print('Python version: "%s"' % get_python_version())
+ print('Current installation scheme: "%s"' % _get_default_scheme())
+ print()
_print_dict('Paths', get_paths())
- print('')
+ print()
_print_dict('Variables', get_config_vars())
+
if __name__ == '__main__':
_main()
diff --git a/Lib/tabnanny.py b/Lib/tabnanny.py
index 46f8163..4a54f89 100755
--- a/Lib/tabnanny.py
+++ b/Lib/tabnanny.py
@@ -126,6 +126,9 @@ def check(file):
else: print(file, badline, repr(line))
return
+ finally:
+ f.close()
+
if verbose:
print("%r: Clean bill of health." % (file,))
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index f26953e..7b9f407 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -29,8 +29,6 @@
"""Read from and write to tar format archives.
"""
-__version__ = "$Revision$"
-
version = "0.9.0"
__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
@@ -42,9 +40,9 @@ __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
#---------
import sys
import os
+import io
import shutil
import stat
-import errno
import time
import struct
import copy
@@ -247,8 +245,8 @@ def calc_chksums(buf):
the high bit set. So we calculate two checksums, unsigned and
signed.
"""
- unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
- signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
+ unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
+ signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
return unsigned_chksum, signed_chksum
def copyfileobj(src, dst, length=None):
@@ -276,47 +274,13 @@ def copyfileobj(src, dst, length=None):
dst.write(buf)
return
-filemode_table = (
- ((S_IFLNK, "l"),
- (S_IFREG, "-"),
- (S_IFBLK, "b"),
- (S_IFDIR, "d"),
- (S_IFCHR, "c"),
- (S_IFIFO, "p")),
-
- ((TUREAD, "r"),),
- ((TUWRITE, "w"),),
- ((TUEXEC|TSUID, "s"),
- (TSUID, "S"),
- (TUEXEC, "x")),
-
- ((TGREAD, "r"),),
- ((TGWRITE, "w"),),
- ((TGEXEC|TSGID, "s"),
- (TSGID, "S"),
- (TGEXEC, "x")),
-
- ((TOREAD, "r"),),
- ((TOWRITE, "w"),),
- ((TOEXEC|TSVTX, "t"),
- (TSVTX, "T"),
- (TOEXEC, "x"))
-)
-
def filemode(mode):
- """Convert a file's mode to a string of the form
- -rwxrwxrwx.
- Used by TarFile.list()
- """
- perm = []
- for table in filemode_table:
- for bit, char in table:
- if mode & bit == bit:
- perm.append(char)
- break
- else:
- perm.append("-")
- return "".join(perm)
+ """Deprecated in this location; use stat.filemode."""
+ import warnings
+ warnings.warn("deprecated in favor of stat.filemode",
+ DeprecationWarning, 2)
+ return stat.filemode(mode)
+
class TarError(Exception):
"""Base exception."""
@@ -423,10 +387,11 @@ class _Stream:
self.crc = zlib.crc32(b"")
if mode == "r":
self._init_read_gz()
+ self.exception = zlib.error
else:
self._init_write_gz()
- if comptype == "bz2":
+ elif comptype == "bz2":
try:
import bz2
except ImportError:
@@ -434,8 +399,25 @@ class _Stream:
if mode == "r":
self.dbuf = b""
self.cmp = bz2.BZ2Decompressor()
+ self.exception = IOError
else:
self.cmp = bz2.BZ2Compressor()
+
+ elif comptype == "xz":
+ try:
+ import lzma
+ except ImportError:
+ raise CompressionError("lzma module is not available")
+ if mode == "r":
+ self.dbuf = b""
+ self.cmp = lzma.LZMADecompressor()
+ self.exception = lzma.LZMAError
+ else:
+ self.cmp = lzma.LZMACompressor()
+
+ elif comptype != "tar":
+ raise CompressionError("unknown compression type %r" % comptype)
+
except:
if not self._extfileobj:
self.fileobj.close()
@@ -587,7 +569,7 @@ class _Stream:
break
try:
buf = self.cmp.decompress(buf)
- except IOError:
+ except self.exception:
raise ReadError("invalid compressed data")
self.dbuf += buf
c += len(buf)
@@ -625,76 +607,19 @@ class _StreamProxy(object):
return self.buf
def getcomptype(self):
- if self.buf.startswith(b"\037\213\010"):
+ if self.buf.startswith(b"\x1f\x8b\x08"):
return "gz"
- if self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
+ elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
return "bz2"
- return "tar"
+ elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
+ return "xz"
+ else:
+ return "tar"
def close(self):
self.fileobj.close()
# class StreamProxy
-class _BZ2Proxy(object):
- """Small proxy class that enables external file object
- support for "r:bz2" and "w:bz2" modes. This is actually
- a workaround for a limitation in bz2 module's BZ2File
- class which (unlike gzip.GzipFile) has no support for
- a file object argument.
- """
-
- blocksize = 16 * 1024
-
- def __init__(self, fileobj, mode):
- self.fileobj = fileobj
- self.mode = mode
- self.name = getattr(self.fileobj, "name", None)
- self.init()
-
- def init(self):
- import bz2
- self.pos = 0
- if self.mode == "r":
- self.bz2obj = bz2.BZ2Decompressor()
- self.fileobj.seek(0)
- self.buf = b""
- else:
- self.bz2obj = bz2.BZ2Compressor()
-
- def read(self, size):
- x = len(self.buf)
- while x < size:
- raw = self.fileobj.read(self.blocksize)
- if not raw:
- break
- data = self.bz2obj.decompress(raw)
- self.buf += data
- x += len(data)
-
- buf = self.buf[:size]
- self.buf = self.buf[size:]
- self.pos += len(buf)
- return buf
-
- def seek(self, pos):
- if pos < self.pos:
- self.init()
- self.read(pos - self.pos)
-
- def tell(self):
- return self.pos
-
- def write(self, data):
- self.pos += len(data)
- raw = self.bz2obj.compress(data)
- self.fileobj.write(raw)
-
- def close(self):
- if self.mode == "w":
- raw = self.bz2obj.flush()
- self.fileobj.write(raw)
-# class _BZ2Proxy
-
#------------------------
# Extraction file object
#------------------------
@@ -709,6 +634,8 @@ class _FileInFile(object):
self.offset = offset
self.size = size
self.position = 0
+ self.name = getattr(fileobj, "name", None)
+ self.closed = False
if blockinfo is None:
blockinfo = [(0, size)]
@@ -727,10 +654,16 @@ class _FileInFile(object):
if lastpos < self.size:
self.map.append((False, lastpos, self.size, None))
+ def flush(self):
+ pass
+
+ def readable(self):
+ return True
+
+ def writable(self):
+ return False
+
def seekable(self):
- if not hasattr(self.fileobj, "seekable"):
- # XXX gzip.GzipFile and bz2.BZ2File
- return True
return self.fileobj.seekable()
def tell(self):
@@ -738,10 +671,21 @@ class _FileInFile(object):
"""
return self.position
- def seek(self, position):
+ def seek(self, position, whence=io.SEEK_SET):
"""Seek to a position in the file.
"""
- self.position = position
+ if whence == io.SEEK_SET:
+ self.position = min(max(position, 0), self.size)
+ elif whence == io.SEEK_CUR:
+ if position < 0:
+ self.position = max(self.position + position, 0)
+ else:
+ self.position = min(self.position + position, self.size)
+ elif whence == io.SEEK_END:
+ self.position = max(min(self.size + position, self.size), 0)
+ else:
+ raise ValueError("Invalid argument")
+ return self.position
def read(self, size=None):
"""Read data from the file.
@@ -770,145 +714,22 @@ class _FileInFile(object):
size -= length
self.position += length
return buf
-#class _FileInFile
-
-
-class ExFileObject(object):
- """File-like object for reading an archive member.
- Is returned by TarFile.extractfile().
- """
- blocksize = 1024
-
- def __init__(self, tarfile, tarinfo):
- self.fileobj = _FileInFile(tarfile.fileobj,
- tarinfo.offset_data,
- tarinfo.size,
- tarinfo.sparse)
- self.name = tarinfo.name
- self.mode = "r"
- self.closed = False
- self.size = tarinfo.size
-
- self.position = 0
- self.buffer = b""
-
- def readable(self):
- return True
-
- def writable(self):
- return False
-
- def seekable(self):
- return self.fileobj.seekable()
-
- def read(self, size=None):
- """Read at most size bytes from the file. If size is not
- present or None, read all data until EOF is reached.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
-
- buf = b""
- if self.buffer:
- if size is None:
- buf = self.buffer
- self.buffer = b""
- else:
- buf = self.buffer[:size]
- self.buffer = self.buffer[size:]
-
- if size is None:
- buf += self.fileobj.read()
- else:
- buf += self.fileobj.read(size - len(buf))
-
- self.position += len(buf)
- return buf
-
- # XXX TextIOWrapper uses the read1() method.
- read1 = read
-
- def readline(self, size=-1):
- """Read one entire line from the file. If size is present
- and non-negative, return a string with at most that
- size, which may be an incomplete line.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
-
- pos = self.buffer.find(b"\n") + 1
- if pos == 0:
- # no newline found.
- while True:
- buf = self.fileobj.read(self.blocksize)
- self.buffer += buf
- if not buf or b"\n" in buf:
- pos = self.buffer.find(b"\n") + 1
- if pos == 0:
- # no newline found.
- pos = len(self.buffer)
- break
-
- if size != -1:
- pos = min(size, pos)
-
- buf = self.buffer[:pos]
- self.buffer = self.buffer[pos:]
- self.position += len(buf)
- return buf
-
- def readlines(self):
- """Return a list with all remaining lines.
- """
- result = []
- while True:
- line = self.readline()
- if not line: break
- result.append(line)
- return result
-
- def tell(self):
- """Return the current file position.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
-
- return self.position
-
- def seek(self, pos, whence=os.SEEK_SET):
- """Seek to a position in the file.
- """
- if self.closed:
- raise ValueError("I/O operation on closed file")
- if whence == os.SEEK_SET:
- self.position = min(max(pos, 0), self.size)
- elif whence == os.SEEK_CUR:
- if pos < 0:
- self.position = max(self.position + pos, 0)
- else:
- self.position = min(self.position + pos, self.size)
- elif whence == os.SEEK_END:
- self.position = max(min(self.size + pos, self.size), 0)
- else:
- raise ValueError("Invalid argument")
-
- self.buffer = b""
- self.fileobj.seek(self.position)
+ def readinto(self, b):
+ buf = self.read(len(b))
+ b[:len(buf)] = buf
+ return len(buf)
def close(self):
- """Close the file object.
- """
self.closed = True
+#class _FileInFile
- def __iter__(self):
- """Get an iterator over the file's lines.
- """
- while True:
- line = self.readline()
- if not line:
- break
- yield line
+class ExFileObject(io.BufferedReader):
+
+ def __init__(self, tarfile, tarinfo):
+ fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
+ tarinfo.size, tarinfo.sparse)
+ super().__init__(fileobj)
#class ExFileObject
#------------------
@@ -1087,7 +908,7 @@ class TarInfo(object):
def create_pax_global_header(cls, pax_headers):
"""Return the object as a pax global header block sequence.
"""
- return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8")
+ return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
def _posix_split_name(self, name):
"""Split a name longer than 100 chars into a prefix
@@ -1170,7 +991,7 @@ class TarInfo(object):
binary = False
for keyword, value in pax_headers.items():
try:
- value.encode("utf8", "strict")
+ value.encode("utf-8", "strict")
except UnicodeEncodeError:
binary = True
break
@@ -1181,13 +1002,13 @@ class TarInfo(object):
records += b"21 hdrcharset=BINARY\n"
for keyword, value in pax_headers.items():
- keyword = keyword.encode("utf8")
+ keyword = keyword.encode("utf-8")
if binary:
# Try to restore the original byte representation of `value'.
# Needless to say, that the encoding must match the string.
value = value.encode(encoding, "surrogateescape")
else:
- value = value.encode("utf8")
+ value = value.encode("utf-8")
l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
n = p = 0
@@ -1396,7 +1217,7 @@ class TarInfo(object):
# the translation to UTF-8 fails.
match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
if match is not None:
- pax_headers["hdrcharset"] = match.group(1).decode("utf8")
+ pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
# For the time being, we don't care about anything other than "BINARY".
# The only other value that is currently allowed by the standard is
@@ -1405,7 +1226,7 @@ class TarInfo(object):
if hdrcharset == "BINARY":
encoding = tarfile.encoding
else:
- encoding = "utf8"
+ encoding = "utf-8"
# Parse pax header information. A record looks like that:
# "%d %s=%s\n" % (length, keyword, value). length is the size
@@ -1422,20 +1243,20 @@ class TarInfo(object):
length = int(length)
value = buf[match.end(2) + 1:match.start(1) + length - 1]
- # Normally, we could just use "utf8" as the encoding and "strict"
+ # Normally, we could just use "utf-8" as the encoding and "strict"
# as the error handler, but we better not take the risk. For
# example, GNU tar <= 1.23 is known to store filenames it cannot
# translate to UTF-8 as raw strings (unfortunately without a
# hdrcharset=BINARY header).
# We first try the strict standard encoding, and if that fails we
# fall back on the user's encoding and error handler.
- keyword = self._decode_pax_field(keyword, "utf8", "utf8",
+ keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
tarfile.errors)
if keyword in PAX_NAME_FIELDS:
value = self._decode_pax_field(value, encoding, tarfile.encoding,
tarfile.errors)
else:
- value = self._decode_pax_field(value, "utf8", "utf8",
+ value = self._decode_pax_field(value, "utf-8", "utf-8",
tarfile.errors)
pax_headers[keyword] = value
@@ -1595,7 +1416,7 @@ class TarFile(object):
tarinfo = TarInfo # The default TarInfo class to use.
- fileobject = ExFileObject # The default ExFileObject class to use.
+ fileobject = ExFileObject # The file-object for extractfile().
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
@@ -1714,18 +1535,22 @@ class TarFile(object):
'r:' open for reading exclusively uncompressed
'r:gz' open for reading with gzip compression
'r:bz2' open for reading with bzip2 compression
+ 'r:xz' open for reading with lzma compression
'a' or 'a:' open for appending, creating the file if necessary
'w' or 'w:' open for writing without compression
'w:gz' open for writing with gzip compression
'w:bz2' open for writing with bzip2 compression
+ 'w:xz' open for writing with lzma compression
'r|*' open a stream of tar blocks with transparent compression
'r|' open an uncompressed stream of tar blocks for reading
'r|gz' open a gzip compressed stream of tar blocks
'r|bz2' open a bzip2 compressed stream of tar blocks
+ 'r|xz' open an lzma compressed stream of tar blocks
'w|' open an uncompressed stream for writing
'w|gz' open a gzip compressed stream for writing
'w|bz2' open a bzip2 compressed stream for writing
+ 'w|xz' open an lzma compressed stream for writing
"""
if not name and not fileobj:
@@ -1832,10 +1657,8 @@ class TarFile(object):
except ImportError:
raise CompressionError("bz2 module is not available")
- if fileobj is not None:
- fileobj = _BZ2Proxy(fileobj, mode)
- else:
- fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
+ fileobj = bz2.BZ2File(fileobj or name, mode,
+ compresslevel=compresslevel)
try:
t = cls.taropen(name, mode, fileobj, **kwargs)
@@ -1845,11 +1668,35 @@ class TarFile(object):
t._extfileobj = False
return t
+ @classmethod
+ def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
+ """Open lzma compressed tar archive name for reading or writing.
+ Appending is not allowed.
+ """
+ if mode not in ("r", "w"):
+ raise ValueError("mode must be 'r' or 'w'")
+
+ try:
+ import lzma
+ except ImportError:
+ raise CompressionError("lzma module is not available")
+
+ fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
+
+ try:
+ t = cls.taropen(name, mode, fileobj, **kwargs)
+ except (lzma.LZMAError, EOFError):
+ fileobj.close()
+ raise ReadError("not an lzma file")
+ t._extfileobj = False
+ return t
+
# All *open() methods are registered here.
OPEN_METH = {
"tar": "taropen", # uncompressed tar
"gz": "gzopen", # gzip compressed tar
- "bz2": "bz2open" # bzip2 compressed tar
+ "bz2": "bz2open", # bzip2 compressed tar
+ "xz": "xzopen" # lzma compressed tar
}
#--------------------------------------------------------------------------
@@ -2009,7 +1856,7 @@ class TarFile(object):
for tarinfo in self:
if verbose:
- print(filemode(tarinfo.mode), end=' ')
+ print(stat.filemode(tarinfo.mode), end=' ')
print("%s/%s" % (tarinfo.uname or tarinfo.uid,
tarinfo.gname or tarinfo.gid), end=' ')
if tarinfo.ischr() or tarinfo.isblk():
@@ -2192,12 +2039,9 @@ class TarFile(object):
def extractfile(self, member):
"""Extract a member from the archive as a file object. `member' may be
- a filename or a TarInfo object. If `member' is a regular file, a
- file-like object is returned. If `member' is a link, a file-like
- object is constructed from the link's target. If `member' is none of
- the above, None is returned.
- The file-like object is read-only and provides the following
- methods: read(), readline(), readlines(), seek() and tell()
+ a filename or a TarInfo object. If `member' is a regular file or a
+ link, an io.BufferedReader object is returned. Otherwise, None is
+ returned.
"""
self._check("r")
@@ -2206,12 +2050,8 @@ class TarFile(object):
else:
tarinfo = member
- if tarinfo.isreg():
- return self.fileobject(self, tarinfo)
-
- elif tarinfo.type not in SUPPORTED_TYPES:
- # If a member's type is unknown, it is treated as a
- # regular file.
+ if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
+ # Members with unknown types are treated as regular files.
return self.fileobject(self, tarinfo)
elif tarinfo.islnk() or tarinfo.issym():
@@ -2283,9 +2123,8 @@ class TarFile(object):
# Use a safe mode for the directory, the real mode is set
# later in _extract_member().
os.mkdir(targetpath, 0o700)
- except EnvironmentError as e:
- if e.errno != errno.EEXIST:
- raise
+ except FileExistsError:
+ pass
def makefile(self, tarinfo, targetpath):
"""Make a file called targetpath.
diff --git a/Lib/tempfile.py b/Lib/tempfile.py
index 34dff30..39ebf5a 100644
--- a/Lib/tempfile.py
+++ b/Lib/tempfile.py
@@ -10,8 +10,6 @@ This module also provides some data items to the user:
TMP_MAX - maximum number of names that will be tried before
giving up.
- template - the default prefix for all temporary names.
- You may change this to control the default prefix.
tempdir - If this is set to a string before the first use of
any routine from this module, it will be considered as
another candidate location to store temporary files.
@@ -33,7 +31,6 @@ import warnings as _warnings
import sys as _sys
import io as _io
import os as _os
-import errno as _errno
from random import Random as _Random
try:
@@ -45,7 +42,7 @@ else:
def _set_cloexec(fd):
try:
flags = _fcntl.fcntl(fd, _fcntl.F_GETFD, 0)
- except IOError:
+ except OSError:
pass
else:
# flags read successfully, modify
@@ -74,6 +71,8 @@ if hasattr(_os, 'TMP_MAX'):
else:
TMP_MAX = 10000
+# Although it does not have an underscore for historical reasons, this
+# variable is an internal implementation detail (see issue 10354).
template = "tmp"
# Internal routines.
@@ -85,19 +84,16 @@ if hasattr(_os, "lstat"):
elif hasattr(_os, "stat"):
_stat = _os.stat
else:
- # Fallback. All we need is something that raises os.error if the
+ # Fallback. All we need is something that raises OSError if the
# file doesn't exist.
def _stat(fn):
- try:
- f = open(fn)
- except IOError:
- raise _os.error
+ f = open(fn)
f.close()
def _exists(fn):
try:
_stat(fn)
- except _os.error:
+ except OSError:
return False
else:
return True
@@ -149,7 +145,7 @@ def _candidate_tempdir_list():
# As a last resort, the current directory.
try:
dirlist.append(_os.getcwd())
- except (AttributeError, _os.error):
+ except (AttributeError, OSError):
dirlist.append(_os.curdir)
return dirlist
@@ -181,12 +177,11 @@ def _get_default_tempdir():
_os.unlink(filename)
del fp, fd
return dir
- except (OSError, IOError) as e:
- if e.args[0] != _errno.EEXIST:
- break # no point trying more names in this directory
+ except FileExistsError:
pass
- raise IOError(_errno.ENOENT,
- "No usable temporary directory found in %s" % dirlist)
+ except OSError:
+ break # no point trying more names in this directory
+ raise FileNotFoundError("No usable temporary directory found in %s" % dirlist)
_name_sequence = None
@@ -216,12 +211,10 @@ def _mkstemp_inner(dir, pre, suf, flags):
fd = _os.open(file, flags, 0o600)
_set_cloexec(fd)
return (fd, _os.path.abspath(file))
- except OSError as e:
- if e.errno == _errno.EEXIST:
- continue # try again
- raise
+ except FileExistsError:
+ continue # try again
- raise IOError(_errno.EEXIST, "No usable temporary file name found")
+ raise FileExistsError("No usable temporary file name found")
# User visible interfaces.
@@ -305,12 +298,10 @@ def mkdtemp(suffix="", prefix=template, dir=None):
try:
_os.mkdir(file, 0o700)
return file
- except OSError as e:
- if e.errno == _errno.EEXIST:
- continue # try again
- raise
+ except FileExistsError:
+ continue # try again
- raise IOError(_errno.EEXIST, "No usable temporary directory name found")
+ raise FileExistsError("No usable temporary directory name found")
def mktemp(suffix="", prefix=template, dir=None):
"""User-callable function to return a unique temporary file name. The
@@ -339,7 +330,7 @@ def mktemp(suffix="", prefix=template, dir=None):
if not _exists(file):
return file
- raise IOError(_errno.EEXIST, "No usable temporary filename found")
+ raise FileExistsError("No usable temporary filename found")
class _TemporaryFileWrapper:
@@ -592,8 +583,13 @@ class SpooledTemporaryFile:
def tell(self):
return self._file.tell()
- def truncate(self):
- self._file.truncate()
+ def truncate(self, size=None):
+ if size is None:
+ self._file.truncate()
+ else:
+ if size > self._max_size:
+ self.rollover()
+ self._file.truncate(size)
def write(self, s):
file = self._file
@@ -669,7 +665,7 @@ class TemporaryDirectory(object):
_islink = staticmethod(_os.path.islink)
_remove = staticmethod(_os.remove)
_rmdir = staticmethod(_os.rmdir)
- _os_error = _os.error
+ _os_error = OSError
_warn = _warnings.warn
def _rmtree(self, path):
diff --git a/Lib/test/buffer_tests.py b/Lib/test/buffer_tests.py
index 6d20f7d..cf54c28 100644
--- a/Lib/test/buffer_tests.py
+++ b/Lib/test/buffer_tests.py
@@ -200,7 +200,13 @@ class MixinBytesBufferCommonTests(object):
self.marshal(b'abc\ndef\r\nghi\n\r').splitlines())
self.assertEqual([b'', b'abc', b'def', b'ghi', b''],
self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines())
+ self.assertEqual([b'', b'abc', b'def', b'ghi', b''],
+ self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(False))
+ self.assertEqual([b'\n', b'abc\n', b'def\r\n', b'ghi\n', b'\r'],
+ self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(True))
+ self.assertEqual([b'', b'abc', b'def', b'ghi', b''],
+ self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(keepends=False))
self.assertEqual([b'\n', b'abc\n', b'def\r\n', b'ghi\n', b'\r'],
- self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(1))
+ self.marshal(b'\nabc\ndef\r\nghi\n\r').splitlines(keepends=True))
self.assertRaises(TypeError, self.marshal(b'abc').splitlines, 42, 42)
diff --git a/Lib/test/crashers/README b/Lib/test/crashers/README
index 2a73e1b..0259a06 100644
--- a/Lib/test/crashers/README
+++ b/Lib/test/crashers/README
@@ -14,3 +14,7 @@ note if the cause is system or environment dependent and what the variables are.
Once the crash is fixed, the test case should be moved into an appropriate test
(even if it was originally from the test suite). This ensures the regression
doesn't happen again. And if it does, it should be easier to track down.
+
+Also see Lib/test_crashers.py which exercises the crashers in this directory.
+In particular, make sure to add any new infinite loop crashers to the black
+list so it doesn't try to run them.
diff --git a/Lib/test/crashers/borrowed_ref_1.py b/Lib/test/crashers/borrowed_ref_1.py
deleted file mode 100644
index b82f464..0000000
--- a/Lib/test/crashers/borrowed_ref_1.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-_PyType_Lookup() returns a borrowed reference.
-This attacks the call in dictobject.c.
-"""
-
-class A(object):
- pass
-
-class B(object):
- def __del__(self):
- print('hi')
- del D.__missing__
-
-class D(dict):
- class __missing__:
- def __init__(self, *args):
- pass
-
-
-d = D()
-a = A()
-a.cycle = a
-a.other = B()
-del a
-
-prev = None
-while 1:
- d[5]
- prev = (prev,)
diff --git a/Lib/test/crashers/borrowed_ref_2.py b/Lib/test/crashers/borrowed_ref_2.py
deleted file mode 100644
index 6e403eb..0000000
--- a/Lib/test/crashers/borrowed_ref_2.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-_PyType_Lookup() returns a borrowed reference.
-This attacks PyObject_GenericSetAttr().
-
-NB. on my machine this crashes in 2.5 debug but not release.
-"""
-
-class A(object):
- pass
-
-class B(object):
- def __del__(self):
- print("hi")
- del C.d
-
-class D(object):
- def __set__(self, obj, value):
- self.hello = 42
-
-class C(object):
- d = D()
-
- def g():
- pass
-
-
-c = C()
-a = A()
-a.cycle = a
-a.other = B()
-
-lst = [None] * 1000000
-i = 0
-del a
-while 1:
- c.d = 42 # segfaults in PyMethod_New(__func__=D.__set__, __self__=d)
- lst[i] = c.g # consume the free list of instancemethod objects
- i += 1
diff --git a/Lib/test/crashers/compiler_recursion.py b/Lib/test/crashers/compiler_recursion.py
index 4954bdd..31f28a9 100644
--- a/Lib/test/crashers/compiler_recursion.py
+++ b/Lib/test/crashers/compiler_recursion.py
@@ -1,5 +1,13 @@
"""
-The compiler (>= 2.5) recurses happily.
+The compiler (>= 2.5) recurses happily until it blows the stack.
+
+Recorded on the tracker as http://bugs.python.org/issue11383
"""
-compile('()'*9**5, '?', 'exec')
+# The variant below blows up in compiler_call, but there are assorted
+# other variations that blow up in other functions
+# e.g. '1*'*10**5+'1' will die in compiler_visit_expr
+
+# The exact limit to destroy the stack will vary by platform
+# but 10M should do the trick even with huge stack allocations
+compile('()'*10**7, '?', 'exec')
diff --git a/Lib/test/crashers/loosing_mro_ref.py b/Lib/test/crashers/loosing_mro_ref.py
deleted file mode 100644
index b3bcd32..0000000
--- a/Lib/test/crashers/loosing_mro_ref.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-There is a way to put keys of any type in a type's dictionary.
-I think this allows various kinds of crashes, but so far I have only
-found a convoluted attack of _PyType_Lookup(), which uses the mro of the
-type without holding a strong reference to it. Probably works with
-super.__getattribute__() too, which uses the same kind of code.
-"""
-
-class MyKey(object):
- def __hash__(self):
- return hash('mykey')
-
- def __eq__(self, other):
- # the following line decrefs the previous X.__mro__
- X.__bases__ = (Base2,)
- # trash all tuples of length 3, to make sure that the items of
- # the previous X.__mro__ are really garbage
- z = []
- for i in range(1000):
- z.append((i, None, None))
- return 0
-
-
-class Base(object):
- mykey = 'from Base'
-
-class Base2(object):
- mykey = 'from Base2'
-
-# you can't add a non-string key to X.__dict__, but it can be
-# there from the beginning :-)
-X = type('X', (Base,), {MyKey(): 5})
-
-print(X.mykey)
-# I get a segfault, or a slightly wrong assertion error in a debug build.
diff --git a/Lib/test/crashers/nasty_eq_vs_dict.py b/Lib/test/crashers/nasty_eq_vs_dict.py
deleted file mode 100644
index 85f7caf..0000000
--- a/Lib/test/crashers/nasty_eq_vs_dict.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# from http://mail.python.org/pipermail/python-dev/2001-June/015239.html
-
-# if you keep changing a dictionary while looking up a key, you can
-# provoke an infinite recursion in C
-
-# At the time neither Tim nor Michael could be bothered to think of a
-# way to fix it.
-
-class Yuck:
- def __init__(self):
- self.i = 0
-
- def make_dangerous(self):
- self.i = 1
-
- def __hash__(self):
- # direct to slot 4 in table of size 8; slot 12 when size 16
- return 4 + 8
-
- def __eq__(self, other):
- if self.i == 0:
- # leave dict alone
- pass
- elif self.i == 1:
- # fiddle to 16 slots
- self.__fill_dict(6)
- self.i = 2
- else:
- # fiddle to 8 slots
- self.__fill_dict(4)
- self.i = 1
-
- return 1
-
- def __fill_dict(self, n):
- self.i = 0
- dict.clear()
- for i in range(n):
- dict[i] = i
- dict[self] = "OK!"
-
-y = Yuck()
-dict = {y: "OK!"}
-
-z = Yuck()
-y.make_dangerous()
-print(dict[z])
diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index bb18630..931ef6f 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -979,7 +979,7 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
# exempt such platforms (provided they return reasonable
# results!).
for insane in -1e200, 1e200:
- self.assertRaises(ValueError, self.theclass.fromtimestamp,
+ self.assertRaises(OverflowError, self.theclass.fromtimestamp,
insane)
def test_today(self):
@@ -1291,12 +1291,18 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
self.assertTrue(self.theclass.min)
self.assertTrue(self.theclass.max)
- def test_strftime_out_of_range(self):
- # For nasty technical reasons, we can't handle years before 1000.
- cls = self.theclass
- self.assertEqual(cls(1000, 1, 1).strftime("%Y"), "1000")
- for y in 1, 49, 51, 99, 100, 999:
- self.assertRaises(ValueError, cls(y, 1, 1).strftime, "%Y")
+ def test_strftime_y2k(self):
+ for y in (1, 49, 70, 99, 100, 999, 1000, 1970):
+ d = self.theclass(y, 1, 1)
+ # Issue 13305: For years < 1000, the value is not always
+ # padded to 4 digits across platforms. The C standard
+ # assumes year >= 1900, so it does not specify the number
+ # of digits.
+ if d.strftime("%Y") != '%04d' % y:
+ # Year 42 returns '42', not padded
+ self.assertEqual(d.strftime("%Y"), '%d' % y)
+ # '0042' is obtained anyway
+ self.assertEqual(d.strftime("%4Y"), '%04d' % y)
def test_replace(self):
cls = self.theclass
@@ -1731,13 +1737,74 @@ class TestDateTime(TestDate):
got = self.theclass.utcfromtimestamp(ts)
self.verify_field_equality(expected, got)
+ # Run with US-style DST rules: DST begins 2 a.m. on second Sunday in
+ # March (M3.2.0) and ends 2 a.m. on first Sunday in November (M11.1.0).
+ @support.run_with_tz('EST+05EDT,M3.2.0,M11.1.0')
+ def test_timestamp_naive(self):
+ t = self.theclass(1970, 1, 1)
+ self.assertEqual(t.timestamp(), 18000.0)
+ t = self.theclass(1970, 1, 1, 1, 2, 3, 4)
+ self.assertEqual(t.timestamp(),
+ 18000.0 + 3600 + 2*60 + 3 + 4*1e-6)
+ # Missing hour may produce platform-dependent result
+ t = self.theclass(2012, 3, 11, 2, 30)
+ self.assertIn(self.theclass.fromtimestamp(t.timestamp()),
+ [t - timedelta(hours=1), t + timedelta(hours=1)])
+ # Ambiguous hour defaults to DST
+ t = self.theclass(2012, 11, 4, 1, 30)
+ self.assertEqual(self.theclass.fromtimestamp(t.timestamp()), t)
+
+ # Timestamp may raise an overflow error on some platforms
+ for t in [self.theclass(1,1,1), self.theclass(9999,12,12)]:
+ try:
+ s = t.timestamp()
+ except OverflowError:
+ pass
+ else:
+ self.assertEqual(self.theclass.fromtimestamp(s), t)
+
+ def test_timestamp_aware(self):
+ t = self.theclass(1970, 1, 1, tzinfo=timezone.utc)
+ self.assertEqual(t.timestamp(), 0.0)
+ t = self.theclass(1970, 1, 1, 1, 2, 3, 4, tzinfo=timezone.utc)
+ self.assertEqual(t.timestamp(),
+ 3600 + 2*60 + 3 + 4*1e-6)
+ t = self.theclass(1970, 1, 1, 1, 2, 3, 4,
+ tzinfo=timezone(timedelta(hours=-5), 'EST'))
+ self.assertEqual(t.timestamp(),
+ 18000 + 3600 + 2*60 + 3 + 4*1e-6)
def test_microsecond_rounding(self):
- # Test whether fromtimestamp "rounds up" floats that are less
- # than 1/2 microsecond smaller than an integer.
for fts in [self.theclass.fromtimestamp,
self.theclass.utcfromtimestamp]:
- self.assertEqual(fts(0.9999999), fts(1))
- self.assertEqual(fts(0.99999949).microsecond, 999999)
+ zero = fts(0)
+ self.assertEqual(zero.second, 0)
+ self.assertEqual(zero.microsecond, 0)
+ try:
+ minus_one = fts(-1e-6)
+ except OSError:
+ # localtime(-1) and gmtime(-1) is not supported on Windows
+ pass
+ else:
+ self.assertEqual(minus_one.second, 59)
+ self.assertEqual(minus_one.microsecond, 999999)
+
+ t = fts(-1e-8)
+ self.assertEqual(t, minus_one)
+ t = fts(-9e-7)
+ self.assertEqual(t, minus_one)
+ t = fts(-1e-7)
+ self.assertEqual(t, minus_one)
+
+ t = fts(1e-7)
+ self.assertEqual(t, zero)
+ t = fts(9e-7)
+ self.assertEqual(t, zero)
+ t = fts(0.99999949)
+ self.assertEqual(t.second, 0)
+ self.assertEqual(t.microsecond, 999999)
+ t = fts(0.9999999)
+ self.assertEqual(t.second, 0)
+ self.assertEqual(t.microsecond, 999999)
def test_insane_fromtimestamp(self):
# It's possible that some platform maps time_t to double,
@@ -1745,7 +1812,7 @@ class TestDateTime(TestDate):
# exempt such platforms (provided they return reasonable
# results!).
for insane in -1e200, 1e200:
- self.assertRaises(ValueError, self.theclass.fromtimestamp,
+ self.assertRaises(OverflowError, self.theclass.fromtimestamp,
insane)
def test_insane_utcfromtimestamp(self):
@@ -1754,7 +1821,7 @@ class TestDateTime(TestDate):
# exempt such platforms (provided they return reasonable
# results!).
for insane in -1e200, 1e200:
- self.assertRaises(ValueError, self.theclass.utcfromtimestamp,
+ self.assertRaises(OverflowError, self.theclass.utcfromtimestamp,
insane)
@unittest.skipIf(sys.platform == "win32", "Windows doesn't accept negative timestamps")
def test_negative_float_fromtimestamp(self):
@@ -1907,7 +1974,7 @@ class TestDateTime(TestDate):
# simply can't be applied to a naive object.
dt = self.theclass.now()
f = FixedOffset(44, "")
- self.assertRaises(TypeError, dt.astimezone) # not enough args
+ self.assertRaises(ValueError, dt.astimezone) # naive
self.assertRaises(TypeError, dt.astimezone, f, f) # too many args
self.assertRaises(TypeError, dt.astimezone, dt) # arg wrong type
self.assertRaises(ValueError, dt.astimezone, f) # naive
@@ -2479,7 +2546,7 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase):
self.assertEqual(t1, t2)
self.assertEqual(t1, t3)
self.assertEqual(t2, t3)
- self.assertRaises(TypeError, lambda: t4 == t5) # mixed tz-aware & naive
+ self.assertNotEqual(t4, t5) # mixed tz-aware & naive
self.assertRaises(TypeError, lambda: t4 < t5) # mixed tz-aware & naive
self.assertRaises(TypeError, lambda: t5 < t4) # mixed tz-aware & naive
@@ -2631,7 +2698,7 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase):
t2 = t2.replace(tzinfo=FixedOffset(None, ""))
self.assertEqual(t1, t2)
t2 = t2.replace(tzinfo=FixedOffset(0, ""))
- self.assertRaises(TypeError, lambda: t1 == t2)
+ self.assertNotEqual(t1, t2)
# In time w/ identical tzinfo objects, utcoffset is ignored.
class Varies(tzinfo):
@@ -2736,16 +2803,16 @@ class TestDateTimeTZ(TestDateTime, TZInfoBase, unittest.TestCase):
microsecond=1)
self.assertTrue(t1 > t2)
- # Make t2 naive and it should fail.
+ # Make t2 naive and it should differ.
t2 = self.theclass.min
- self.assertRaises(TypeError, lambda: t1 == t2)
+ self.assertNotEqual(t1, t2)
self.assertEqual(t2, t2)
# It's also naive if it has tzinfo but tzinfo.utcoffset() is None.
class Naive(tzinfo):
def utcoffset(self, dt): return None
t2 = self.theclass(5, 6, 7, tzinfo=Naive())
- self.assertRaises(TypeError, lambda: t1 == t2)
+ self.assertNotEqual(t1, t2)
self.assertEqual(t2, t2)
# OTOH, it's OK to compare two of these mixing the two ways of being
@@ -3188,8 +3255,6 @@ class TestDateTimeTZ(TestDateTime, TZInfoBase, unittest.TestCase):
self.assertTrue(dt.tzinfo is f44m)
# Replacing with degenerate tzinfo raises an exception.
self.assertRaises(ValueError, dt.astimezone, fnone)
- # Ditto with None tz.
- self.assertRaises(TypeError, dt.astimezone, None)
# Replacing with same tzinfo makes no change.
x = dt.astimezone(dt.tzinfo)
self.assertTrue(x.tzinfo is f44m)
@@ -3209,6 +3274,25 @@ class TestDateTimeTZ(TestDateTime, TZInfoBase, unittest.TestCase):
self.assertTrue(got.tzinfo is expected.tzinfo)
self.assertEqual(got, expected)
+ @support.run_with_tz('UTC')
+ def test_astimezone_default_utc(self):
+ dt = self.theclass.now(timezone.utc)
+ self.assertEqual(dt.astimezone(None), dt)
+ self.assertEqual(dt.astimezone(), dt)
+
+ # Note that offset in TZ variable has the opposite sign to that
+ # produced by %z directive.
+ @support.run_with_tz('EST+05EDT,M3.2.0,M11.1.0')
+ def test_astimezone_default_eastern(self):
+ dt = self.theclass(2012, 11, 4, 6, 30, tzinfo=timezone.utc)
+ local = dt.astimezone()
+ self.assertEqual(dt, local)
+ self.assertEqual(local.strftime("%z %Z"), "-0500 EST")
+ dt = self.theclass(2012, 11, 4, 5, 30, tzinfo=timezone.utc)
+ local = dt.astimezone()
+ self.assertEqual(dt, local)
+ self.assertEqual(local.strftime("%z %Z"), "-0400 EDT")
+
def test_aware_subtract(self):
cls = self.theclass
@@ -3262,7 +3346,7 @@ class TestDateTimeTZ(TestDateTime, TZInfoBase, unittest.TestCase):
t2 = t2.replace(tzinfo=FixedOffset(None, ""))
self.assertEqual(t1, t2)
t2 = t2.replace(tzinfo=FixedOffset(0, ""))
- self.assertRaises(TypeError, lambda: t1 == t2)
+ self.assertNotEqual(t1, t2)
# In datetime w/ identical tzinfo objects, utcoffset is ignored.
class Varies(tzinfo):
diff --git a/Lib/test/decimaltestdata/extra.decTest b/Lib/test/decimaltestdata/extra.decTest
index fe8b77a..b630d8e 100644
--- a/Lib/test/decimaltestdata/extra.decTest
+++ b/Lib/test/decimaltestdata/extra.decTest
@@ -222,12 +222,25 @@ extr1700 power 10 1e-999999999 -> 1.000000000000000 Inexact Rounded
extr1701 power 100.0 -557.71e-742888888 -> 1.000000000000000 Inexact Rounded
extr1702 power 10 1e-100 -> 1.000000000000000 Inexact Rounded
+-- Another one (see issue #12080). Thanks again to Stefan Krah.
+extr1703 power 4 -1.2e-999999999 -> 1.000000000000000 Inexact Rounded
+
-- A couple of interesting exact cases for power. Note that the specification
-- requires these to be reported as Inexact.
extr1710 power 1e375 56e-3 -> 1.000000000000000E+21 Inexact Rounded
extr1711 power 10000 0.75 -> 1000.000000000000 Inexact Rounded
extr1712 power 1e-24 0.875 -> 1.000000000000000E-21 Inexact Rounded
+-- Some more exact cases, exercising power with negative second argument.
+extr1720 power 400 -0.5 -> 0.05000000000000000 Inexact Rounded
+extr1721 power 4096 -0.75 -> 0.001953125000000000 Inexact Rounded
+extr1722 power 625e4 -0.25 -> 0.02000000000000000 Inexact Rounded
+
+-- Nonexact cases, to exercise some of the early exit conditions from
+-- _power_exact.
+extr1730 power 2048 -0.75 -> 0.003284751622084822 Inexact Rounded
+
+
-- Tests for the is_* boolean operations
precision: 9
maxExponent: 999
diff --git a/Lib/test/dh512.pem b/Lib/test/dh512.pem
new file mode 100644
index 0000000..200d16c
--- /dev/null
+++ b/Lib/test/dh512.pem
@@ -0,0 +1,9 @@
+-----BEGIN DH PARAMETERS-----
+MEYCQQD1Kv884bEpQBgRjXyEpwpy1obEAxnIByl6ypUM2Zafq9AKUJsCRtMIPWak
+XUGfnHy9iUsiGSa6q6Jew1XpKgVfAgEC
+-----END DH PARAMETERS-----
+
+These are the 512 bit DH parameters from "Assigned Number for SKIP Protocols"
+(http://www.skip-vpn.org/spec/numbers.html).
+See there for how they were generated.
+Note that g is not a generator, but this is not a problem since p is a safe prime.
diff --git a/Lib/test/exception_hierarchy.txt b/Lib/test/exception_hierarchy.txt
index 5037b33..1c1f69f 100644
--- a/Lib/test/exception_hierarchy.txt
+++ b/Lib/test/exception_hierarchy.txt
@@ -11,11 +11,6 @@ BaseException
+-- AssertionError
+-- AttributeError
+-- BufferError
- +-- EnvironmentError
- | +-- IOError
- | +-- OSError
- | +-- WindowsError (Windows)
- | +-- VMSError (VMS)
+-- EOFError
+-- ImportError
+-- LookupError
@@ -24,6 +19,22 @@ BaseException
+-- MemoryError
+-- NameError
| +-- UnboundLocalError
+ +-- OSError
+ | +-- BlockingIOError
+ | +-- ChildProcessError
+ | +-- ConnectionError
+ | | +-- BrokenPipeError
+ | | +-- ConnectionAbortedError
+ | | +-- ConnectionRefusedError
+ | | +-- ConnectionResetError
+ | +-- FileExistsError
+ | +-- FileNotFoundError
+ | +-- InterruptedError
+ | +-- IsADirectoryError
+ | +-- NotADirectoryError
+ | +-- PermissionError
+ | +-- ProcessLookupError
+ | +-- TimeoutError
+-- ReferenceError
+-- RuntimeError
| +-- NotImplementedError
diff --git a/Lib/test/fork_wait.py b/Lib/test/fork_wait.py
index 1caab1c..88527df 100644
--- a/Lib/test/fork_wait.py
+++ b/Lib/test/fork_wait.py
@@ -43,6 +43,7 @@ class ForkWait(unittest.TestCase):
self.assertEqual(spid, cpid)
self.assertEqual(status, 0, "cause = %d, exit = %d" % (status&0xff, status>>8))
+ @support.reap_threads
def test_wait(self):
for i in range(NUM_THREADS):
_thread.start_new(self.f, (i,))
@@ -69,7 +70,8 @@ class ForkWait(unittest.TestCase):
os._exit(n)
else:
# Parent
- self.wait_impl(cpid)
- # Tell threads to die
- self.stop = 1
- time.sleep(2*SHORTSLEEP) # Wait for threads to die
+ try:
+ self.wait_impl(cpid)
+ finally:
+ # Tell threads to die
+ self.stop = 1
diff --git a/Lib/test/test_future1.py b/Lib/test/future_test1.py
index 297c2e0..297c2e0 100644
--- a/Lib/test/test_future1.py
+++ b/Lib/test/future_test1.py
diff --git a/Lib/test/test_future2.py b/Lib/test/future_test2.py
index 3d7fc86..3d7fc86 100644
--- a/Lib/test/test_future2.py
+++ b/Lib/test/future_test2.py
diff --git a/Lib/test/json_tests/test_dump.py b/Lib/test/json_tests/test_dump.py
index 083c11f..4b3386f 100644
--- a/Lib/test/json_tests/test_dump.py
+++ b/Lib/test/json_tests/test_dump.py
@@ -1,6 +1,7 @@
from io import StringIO
from test.json_tests import PyTest, CTest
+from test.support import bigmemtest, _1G
class TestDump:
def test_dump(self):
@@ -21,4 +22,20 @@ class TestDump:
class TestPyDump(TestDump, PyTest): pass
-class TestCDump(TestDump, CTest): pass
+
+class TestCDump(TestDump, CTest):
+
+ # The size requirement here is hopefully over-estimated (actual
+ # memory consumption depending on implementation details, and also
+ # system memory management, since this may allocate a lot of
+ # small objects).
+
+ @bigmemtest(size=_1G, memuse=1)
+ def test_large_list(self, size):
+ N = int(30 * 1024 * 1024 * (size / _1G))
+ l = [1] * N
+ encoded = self.dumps(l)
+ self.assertEqual(len(encoded), N * 3)
+ self.assertEqual(encoded[:1], "[")
+ self.assertEqual(encoded[-2:], "1]")
+ self.assertEqual(encoded[1:-2], "1, " * (N - 1))
diff --git a/Lib/test/json_tests/test_scanstring.py b/Lib/test/json_tests/test_scanstring.py
index f82cdee..426c8dd 100644
--- a/Lib/test/json_tests/test_scanstring.py
+++ b/Lib/test/json_tests/test_scanstring.py
@@ -9,14 +9,9 @@ class TestScanstring:
scanstring('"z\\ud834\\udd20x"', 1, True),
('z\U0001d120x', 16))
- if sys.maxunicode == 65535:
- self.assertEqual(
- scanstring('"z\U0001d120x"', 1, True),
- ('z\U0001d120x', 6))
- else:
- self.assertEqual(
- scanstring('"z\U0001d120x"', 1, True),
- ('z\U0001d120x', 5))
+ self.assertEqual(
+ scanstring('"z\U0001d120x"', 1, True),
+ ('z\U0001d120x', 5))
self.assertEqual(
scanstring('"\\u007b"', 1, True),
diff --git a/Lib/test/keycert.passwd.pem b/Lib/test/keycert.passwd.pem
new file mode 100644
index 0000000..e905748
--- /dev/null
+++ b/Lib/test/keycert.passwd.pem
@@ -0,0 +1,33 @@
+-----BEGIN RSA PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: DES-EDE3-CBC,1A8D9D2A02EC698A
+
+kJYbfZ8L0sfe9Oty3gw0aloNnY5E8fegRfQLZlNoxTl6jNt0nIwI8kDJ36CZgR9c
+u3FDJm/KqrfUoz8vW+qEnWhSG7QPX2wWGPHd4K94Yz/FgrRzZ0DoK7XxXq9gOtVA
+AVGQhnz32p+6WhfGsCr9ArXEwRZrTk/FvzEPaU5fHcoSkrNVAGX8IpSVkSDwEDQr
+Gv17+cfk99UV1OCza6yKHoFkTtrC+PZU71LomBabivS2Oc4B9hYuSR2hF01wTHP+
+YlWNagZOOVtNz4oKK9x9eNQpmfQXQvPPTfusexKIbKfZrMvJoxcm1gfcZ0H/wK6P
+6wmXSG35qMOOztCZNtperjs1wzEBXznyK8QmLcAJBjkfarABJX9vBEzZV0OUKhy+
+noORFwHTllphbmydLhu6ehLUZMHPhzAS5UN7srtpSN81eerDMy0RMUAwA7/PofX1
+94Me85Q8jP0PC9ETdsJcPqLzAPETEYu0ELewKRcrdyWi+tlLFrpE5KT/s5ecbl9l
+7B61U4Kfd1PIXc/siINhU3A3bYK+845YyUArUOnKf1kEox7p1RpD7yFqVT04lRTo
+cibNKATBusXSuBrp2G6GNuhWEOSafWCKJQAzgCYIp6ZTV2khhMUGppc/2H3CF6cO
+zX0KtlPVZC7hLkB6HT8SxYUwF1zqWY7+/XPPdc37MeEZ87Q3UuZwqORLY+Z0hpgt
+L5JXBCoklZhCAaN2GqwFLXtGiRSRFGY7xXIhbDTlE65Wv1WGGgDLMKGE1gOz3yAo
+2jjG1+yAHJUdE69XTFHSqSkvaloA1W03LdMXZ9VuQJ/ySXCie6ABAQ==
+-----END RSA PRIVATE KEY-----
+-----BEGIN CERTIFICATE-----
+MIICVDCCAb2gAwIBAgIJANfHOBkZr8JOMA0GCSqGSIb3DQEBBQUAMF8xCzAJBgNV
+BAYTAlhZMRcwFQYDVQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9u
+IFNvZnR3YXJlIEZvdW5kYXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDAeFw0xMDEw
+MDgyMzAxNTZaFw0yMDEwMDUyMzAxNTZaMF8xCzAJBgNVBAYTAlhZMRcwFQYDVQQH
+Ew5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9uIFNvZnR3YXJlIEZvdW5k
+YXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDCBnzANBgkqhkiG9w0BAQEFAAOBjQAw
+gYkCgYEA21vT5isq7F68amYuuNpSFlKDPrMUCa4YWYqZRt2OZ+/3NKaZ2xAiSwr7
+6MrQF70t5nLbSPpqE5+5VrS58SY+g/sXLiFd6AplH1wJZwh78DofbFYXUggktFMt
+pTyiX8jtP66bkcPkDADA089RI1TQR6Ca+n7HFa7c1fabVV6i3zkCAwEAAaMYMBYw
+FAYDVR0RBA0wC4IJbG9jYWxob3N0MA0GCSqGSIb3DQEBBQUAA4GBAHPctQBEQ4wd
+BJ6+JcpIraopLn8BGhbjNWj40mmRqWB/NAWF6M5ne7KpGAu7tLeG4hb1zLaldK8G
+lxy2GPSRF6LFS48dpEj2HbMv2nvv6xxalDMJ9+DicWgAKTQ6bcX2j3GUkCR0g/T1
+CRlNBAAlvhKzO7Clpf9l0YKBEfraJByX
+-----END CERTIFICATE-----
diff --git a/Lib/test/list_tests.py b/Lib/test/list_tests.py
index be054ea..42e118b 100644
--- a/Lib/test/list_tests.py
+++ b/Lib/test/list_tests.py
@@ -418,6 +418,47 @@ class CommonTest(seq_tests.CommonTest):
self.assertRaises(TypeError, u.reverse, 42)
+ def test_clear(self):
+ u = self.type2test([2, 3, 4])
+ u.clear()
+ self.assertEqual(u, [])
+
+ u = self.type2test([])
+ u.clear()
+ self.assertEqual(u, [])
+
+ u = self.type2test([])
+ u.append(1)
+ u.clear()
+ u.append(2)
+ self.assertEqual(u, [2])
+
+ self.assertRaises(TypeError, u.clear, None)
+
+ def test_copy(self):
+ u = self.type2test([1, 2, 3])
+ v = u.copy()
+ self.assertEqual(v, [1, 2, 3])
+
+ u = self.type2test([])
+ v = u.copy()
+ self.assertEqual(v, [])
+
+ # test that it's indeed a copy and not a reference
+ u = self.type2test(['a', 'b'])
+ v = u.copy()
+ v.append('i')
+ self.assertEqual(u, ['a', 'b'])
+ self.assertEqual(v, u + ['i'])
+
+ # test that it's a shallow, not a deep copy
+ u = self.type2test([1, 2, [3, 4], 5])
+ v = u.copy()
+ self.assertEqual(u, v)
+ self.assertIs(v[3], u[3])
+
+ self.assertRaises(TypeError, u.copy, None)
+
def test_sort(self):
u = self.type2test([1, 0])
u.sort()
diff --git a/Lib/test/lock_tests.py b/Lib/test/lock_tests.py
index 094cc7a..bfbf44e 100644
--- a/Lib/test/lock_tests.py
+++ b/Lib/test/lock_tests.py
@@ -4,7 +4,7 @@ Various tests for synchronization primitives.
import sys
import time
-from _thread import start_new_thread, get_ident, TIMEOUT_MAX
+from _thread import start_new_thread, TIMEOUT_MAX
import threading
import unittest
@@ -31,7 +31,7 @@ class Bunch(object):
self.finished = []
self._can_exit = not wait_before_exit
def task():
- tid = get_ident()
+ tid = threading.get_ident()
self.started.append(tid)
try:
f()
@@ -255,6 +255,18 @@ class RLockTests(BaseLockTests):
lock.release()
self.assertRaises(RuntimeError, lock.release)
+ def test_release_save_unacquired(self):
+ # Cannot _release_save an unacquired lock
+ lock = self.locktype()
+ self.assertRaises(RuntimeError, lock._release_save)
+ lock.acquire()
+ lock.acquire()
+ lock.release()
+ lock.acquire()
+ lock.release()
+ lock.release()
+ self.assertRaises(RuntimeError, lock._release_save)
+
def test_different_thread(self):
# Cannot release from a different thread
lock = self.locktype()
diff --git a/Lib/test/mailcap.txt b/Lib/test/mailcap.txt
new file mode 100644
index 0000000..f61135d
--- /dev/null
+++ b/Lib/test/mailcap.txt
@@ -0,0 +1,39 @@
+# Mailcap file for test_mailcap; based on RFC 1524
+# Referred to by test_mailcap.py
+
+#
+# This is a comment.
+#
+
+application/frame; showframe %s; print="cat %s | lp"
+application/postscript; ps-to-terminal %s;\
+ needsterminal
+application/postscript; ps-to-terminal %s; \
+ compose=idraw %s
+application/x-dvi; xdvi %s
+application/x-movie; movieplayer %s; compose=moviemaker %s; \
+ description="Movie"; \
+ x11-bitmap="/usr/lib/Zmail/bitmaps/movie.xbm"
+application/*; echo "This is \"%t\" but \
+ is 50 \% Greek to me" \; cat %s; copiousoutput
+
+audio/basic; showaudio %s; compose=audiocompose %s; edit=audiocompose %s;\
+description="An audio fragment"
+audio/* ; /usr/local/bin/showaudio %t
+
+image/rgb; display %s
+#image/gif; display %s
+image/x-xwindowdump; display %s
+
+# The continuation char shouldn't \
+# make a difference in a comment.
+
+message/external-body; showexternal %s %{access-type} %{name} %{site} \
+ %{directory} %{mode} %{server}; needsterminal; composetyped = extcompose %s; \
+ description="A reference to data stored in an external location"
+
+text/richtext; shownonascii iso-8859-8 -e richtext -p %s; test=test "`echo \
+ %{charset} | tr '[A-Z]' '[a-z]'`" = iso-8859-8; copiousoutput
+
+video/mpeg; mpeg_play %s
+video/*; animate %s
diff --git a/Lib/test/math_testcases.txt b/Lib/test/math_testcases.txt
index 5e24335..9585188 100644
--- a/Lib/test/math_testcases.txt
+++ b/Lib/test/math_testcases.txt
@@ -517,3 +517,117 @@ expm10306 expm1 1.79e308 -> inf overflow
-- weaker version of expm10302
expm10307 expm1 709.5 -> 1.3549863193146328e+308
+
+-------------------------
+-- log2: log to base 2 --
+-------------------------
+
+-- special values
+log20000 log2 0.0 -> -inf divide-by-zero
+log20001 log2 -0.0 -> -inf divide-by-zero
+log20002 log2 inf -> inf
+log20003 log2 -inf -> nan invalid
+log20004 log2 nan -> nan
+
+-- exact value at 1.0
+log20010 log2 1.0 -> 0.0
+
+-- negatives
+log20020 log2 -5e-324 -> nan invalid
+log20021 log2 -1.0 -> nan invalid
+log20022 log2 -1.7e-308 -> nan invalid
+
+-- exact values at powers of 2
+log20100 log2 2.0 -> 1.0
+log20101 log2 4.0 -> 2.0
+log20102 log2 8.0 -> 3.0
+log20103 log2 16.0 -> 4.0
+log20104 log2 32.0 -> 5.0
+log20105 log2 64.0 -> 6.0
+log20106 log2 128.0 -> 7.0
+log20107 log2 256.0 -> 8.0
+log20108 log2 512.0 -> 9.0
+log20109 log2 1024.0 -> 10.0
+log20110 log2 2048.0 -> 11.0
+
+log20200 log2 0.5 -> -1.0
+log20201 log2 0.25 -> -2.0
+log20202 log2 0.125 -> -3.0
+log20203 log2 0.0625 -> -4.0
+
+-- values close to 1.0
+log20300 log2 1.0000000000000002 -> 3.2034265038149171e-16
+log20301 log2 1.0000000001 -> 1.4426951601859516e-10
+log20302 log2 1.00001 -> 1.4426878274712997e-5
+
+log20310 log2 0.9999999999999999 -> -1.6017132519074588e-16
+log20311 log2 0.9999999999 -> -1.4426951603302210e-10
+log20312 log2 0.99999 -> -1.4427022544056922e-5
+
+-- tiny values
+log20400 log2 5e-324 -> -1074.0
+log20401 log2 1e-323 -> -1073.0
+log20402 log2 1.5e-323 -> -1072.4150374992789
+log20403 log2 2e-323 -> -1072.0
+
+log20410 log2 1e-308 -> -1023.1538532253076
+log20411 log2 2.2250738585072014e-308 -> -1022.0
+log20412 log2 4.4501477170144028e-308 -> -1021.0
+log20413 log2 1e-307 -> -1019.8319251304202
+
+-- huge values
+log20500 log2 1.7976931348623157e+308 -> 1024.0
+log20501 log2 1.7e+308 -> 1023.9193879716706
+log20502 log2 8.9884656743115795e+307 -> 1023.0
+
+-- selection of random values
+log20600 log2 -7.2174324841039838e+289 -> nan invalid
+log20601 log2 -2.861319734089617e+265 -> nan invalid
+log20602 log2 -4.3507646894008962e+257 -> nan invalid
+log20603 log2 -6.6717265307520224e+234 -> nan invalid
+log20604 log2 -3.9118023786619294e+229 -> nan invalid
+log20605 log2 -1.5478221302505161e+206 -> nan invalid
+log20606 log2 -1.4380485131364602e+200 -> nan invalid
+log20607 log2 -3.7235198730382645e+185 -> nan invalid
+log20608 log2 -1.0472242235095724e+184 -> nan invalid
+log20609 log2 -5.0141781956163884e+160 -> nan invalid
+log20610 log2 -2.1157958031160324e+124 -> nan invalid
+log20611 log2 -7.9677558612567718e+90 -> nan invalid
+log20612 log2 -5.5553906194063732e+45 -> nan invalid
+log20613 log2 -16573900952607.953 -> nan invalid
+log20614 log2 -37198371019.888618 -> nan invalid
+log20615 log2 -6.0727115121422674e-32 -> nan invalid
+log20616 log2 -2.5406841656526057e-38 -> nan invalid
+log20617 log2 -4.9056766703267657e-43 -> nan invalid
+log20618 log2 -2.1646786075228305e-71 -> nan invalid
+log20619 log2 -2.470826790488573e-78 -> nan invalid
+log20620 log2 -3.8661709303489064e-165 -> nan invalid
+log20621 log2 -1.0516496976649986e-182 -> nan invalid
+log20622 log2 -1.5935458614317996e-255 -> nan invalid
+log20623 log2 -2.8750977267336654e-293 -> nan invalid
+log20624 log2 -7.6079466794732585e-296 -> nan invalid
+log20625 log2 3.2073253539988545e-307 -> -1018.1505544209213
+log20626 log2 1.674937885472249e-244 -> -809.80634755783126
+log20627 log2 1.0911259044931283e-214 -> -710.76679472274213
+log20628 log2 2.0275372624809709e-154 -> -510.55719818383272
+log20629 log2 7.3926087369631841e-115 -> -379.13564735312292
+log20630 log2 1.3480198206342423e-86 -> -285.25497445094436
+log20631 log2 8.9927384655719947e-83 -> -272.55127136401637
+log20632 log2 3.1452398713597487e-60 -> -197.66251564496875
+log20633 log2 7.0706573215457351e-55 -> -179.88420087782217
+log20634 log2 3.1258285390731669e-49 -> -161.13023800505653
+log20635 log2 8.2253046627829942e-41 -> -133.15898277355879
+log20636 log2 7.8691367397519897e+49 -> 165.75068202732419
+log20637 log2 2.9920561983925013e+64 -> 214.18453534573757
+log20638 log2 4.7827254553946841e+77 -> 258.04629628445673
+log20639 log2 3.1903566496481868e+105 -> 350.47616767491166
+log20640 log2 5.6195082449502419e+113 -> 377.86831861008250
+log20641 log2 9.9625658250651047e+125 -> 418.55752921228753
+log20642 log2 2.7358945220961532e+145 -> 483.13158636923413
+log20643 log2 2.785842387926931e+174 -> 579.49360214860280
+log20644 log2 2.4169172507252751e+193 -> 642.40529039289652
+log20645 log2 3.1689091206395632e+205 -> 682.65924573798395
+log20646 log2 2.535995592365391e+208 -> 692.30359597460460
+log20647 log2 6.2011236566089916e+233 -> 776.64177576730913
+log20648 log2 2.1843274820677632e+253 -> 841.57499717289647
+log20649 log2 8.7493931063474791e+297 -> 989.74182713073981
diff --git a/Lib/test/memory_watchdog.py b/Lib/test/memory_watchdog.py
new file mode 100644
index 0000000..88cca8d
--- /dev/null
+++ b/Lib/test/memory_watchdog.py
@@ -0,0 +1,28 @@
+"""Memory watchdog: periodically read the memory usage of the main test process
+and print it out, until terminated."""
+# stdin should refer to the process' /proc/<PID>/statm: we don't pass the
+# process' PID to avoid a race condition in case of - unlikely - PID recycling.
+# If the process crashes, reading from the /proc entry will fail with ESRCH.
+
+
+import os
+import sys
+import time
+
+
+try:
+ page_size = os.sysconf('SC_PAGESIZE')
+except (ValueError, AttributeError):
+ try:
+ page_size = os.sysconf('SC_PAGE_SIZE')
+ except (ValueError, AttributeError):
+ page_size = 4096
+
+while True:
+ sys.stdin.seek(0)
+ statm = sys.stdin.read()
+ data = int(statm.split()[5])
+ sys.stdout.write(" ... process data size: {data:.1f}G\n"
+ .format(data=data * page_size / (1024 ** 3)))
+ sys.stdout.flush()
+ time.sleep(1)
diff --git a/Lib/test/mock_socket.py b/Lib/test/mock_socket.py
index 8036932..d09e78c 100644
--- a/Lib/test/mock_socket.py
+++ b/Lib/test/mock_socket.py
@@ -106,7 +106,8 @@ def socket(family=None, type=None, proto=None):
return MockSocket()
-def create_connection(address, timeout=socket_module._GLOBAL_DEFAULT_TIMEOUT):
+def create_connection(address, timeout=socket_module._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None):
try:
int_port = int(address[1])
except ValueError:
diff --git a/Lib/test/test_multibytecodec_support.py b/Lib/test/multibytecodec_support.py
index ef63b69..a2c57ea 100644
--- a/Lib/test/test_multibytecodec_support.py
+++ b/Lib/test/multibytecodec_support.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
#
-# test_multibytecodec_support.py
+# multibytecodec_support.py
# Common Unittest Routines for CJK codecs
#
@@ -264,21 +264,6 @@ class TestBase:
self.assertEqual(ostream.getvalue(), self.tstring[0])
-if len('\U00012345') == 2: # ucs2 build
- _unichr = chr
- def chr(v):
- if v >= 0x10000:
- return _unichr(0xd800 + ((v - 0x10000) >> 10)) + \
- _unichr(0xdc00 + ((v - 0x10000) & 0x3ff))
- else:
- return _unichr(v)
- _ord = ord
- def ord(c):
- if len(c) == 2:
- return 0x10000 + ((_ord(c[0]) - 0xd800) << 10) + \
- (ord(c[1]) - 0xdc00)
- else:
- return _ord(c)
class TestBase_Mapping(unittest.TestCase):
pass_enctest = []
diff --git a/Lib/test/namespace_pkgs/both_portions/foo/one.py b/Lib/test/namespace_pkgs/both_portions/foo/one.py
new file mode 100644
index 0000000..3080f6f
--- /dev/null
+++ b/Lib/test/namespace_pkgs/both_portions/foo/one.py
@@ -0,0 +1 @@
+attr = 'both_portions foo one'
diff --git a/Lib/test/namespace_pkgs/both_portions/foo/two.py b/Lib/test/namespace_pkgs/both_portions/foo/two.py
new file mode 100644
index 0000000..4131d3d
--- /dev/null
+++ b/Lib/test/namespace_pkgs/both_portions/foo/two.py
@@ -0,0 +1 @@
+attr = 'both_portions foo two'
diff --git a/Lib/test/namespace_pkgs/missing_directory.zip b/Lib/test/namespace_pkgs/missing_directory.zip
new file mode 100644
index 0000000..836a910
--- /dev/null
+++ b/Lib/test/namespace_pkgs/missing_directory.zip
Binary files differ
diff --git a/Lib/test/namespace_pkgs/module_and_namespace_package/a_test.py b/Lib/test/namespace_pkgs/module_and_namespace_package/a_test.py
new file mode 100644
index 0000000..43cbedb
--- /dev/null
+++ b/Lib/test/namespace_pkgs/module_and_namespace_package/a_test.py
@@ -0,0 +1 @@
+attr = 'in module'
diff --git a/Lib/importlib/test/__init__.py b/Lib/test/namespace_pkgs/module_and_namespace_package/a_test/empty
index e69de29..e69de29 100644
--- a/Lib/importlib/test/__init__.py
+++ b/Lib/test/namespace_pkgs/module_and_namespace_package/a_test/empty
diff --git a/Lib/test/namespace_pkgs/nested_portion1.zip b/Lib/test/namespace_pkgs/nested_portion1.zip
new file mode 100644
index 0000000..8d22406
--- /dev/null
+++ b/Lib/test/namespace_pkgs/nested_portion1.zip
Binary files differ
diff --git a/Lib/email/test/__init__.py b/Lib/test/namespace_pkgs/not_a_namespace_pkg/foo/__init__.py
index e69de29..e69de29 100644
--- a/Lib/email/test/__init__.py
+++ b/Lib/test/namespace_pkgs/not_a_namespace_pkg/foo/__init__.py
diff --git a/Lib/test/namespace_pkgs/not_a_namespace_pkg/foo/one.py b/Lib/test/namespace_pkgs/not_a_namespace_pkg/foo/one.py
new file mode 100644
index 0000000..d8f5c83
--- /dev/null
+++ b/Lib/test/namespace_pkgs/not_a_namespace_pkg/foo/one.py
@@ -0,0 +1 @@
+attr = 'portion1 foo one'
diff --git a/Lib/test/namespace_pkgs/portion1/foo/one.py b/Lib/test/namespace_pkgs/portion1/foo/one.py
new file mode 100644
index 0000000..d8f5c83
--- /dev/null
+++ b/Lib/test/namespace_pkgs/portion1/foo/one.py
@@ -0,0 +1 @@
+attr = 'portion1 foo one'
diff --git a/Lib/test/namespace_pkgs/portion2/foo/two.py b/Lib/test/namespace_pkgs/portion2/foo/two.py
new file mode 100644
index 0000000..d092e1e
--- /dev/null
+++ b/Lib/test/namespace_pkgs/portion2/foo/two.py
@@ -0,0 +1 @@
+attr = 'portion2 foo two'
diff --git a/Lib/test/namespace_pkgs/project1/parent/child/one.py b/Lib/test/namespace_pkgs/project1/parent/child/one.py
new file mode 100644
index 0000000..2776fcd
--- /dev/null
+++ b/Lib/test/namespace_pkgs/project1/parent/child/one.py
@@ -0,0 +1 @@
+attr = 'parent child one'
diff --git a/Lib/test/namespace_pkgs/project2/parent/child/two.py b/Lib/test/namespace_pkgs/project2/parent/child/two.py
new file mode 100644
index 0000000..8b037bc
--- /dev/null
+++ b/Lib/test/namespace_pkgs/project2/parent/child/two.py
@@ -0,0 +1 @@
+attr = 'parent child two'
diff --git a/Lib/test/namespace_pkgs/project3/parent/child/three.py b/Lib/test/namespace_pkgs/project3/parent/child/three.py
new file mode 100644
index 0000000..f8abfe1
--- /dev/null
+++ b/Lib/test/namespace_pkgs/project3/parent/child/three.py
@@ -0,0 +1 @@
+attr = 'parent child three'
diff --git a/Lib/test/namespace_pkgs/top_level_portion1.zip b/Lib/test/namespace_pkgs/top_level_portion1.zip
new file mode 100644
index 0000000..3b866c9
--- /dev/null
+++ b/Lib/test/namespace_pkgs/top_level_portion1.zip
Binary files differ
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 4d491b0..fb04830 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -4,10 +4,11 @@ import pickle
import pickletools
import sys
import copyreg
+import weakref
from http.cookies import SimpleCookie
from test.support import (
- TestFailed, TESTFN, run_with_locale,
+ TestFailed, TESTFN, run_with_locale, no_tracing,
_2G, _4G, bigmemtest,
)
@@ -18,7 +19,7 @@ from pickle import bytes_types
# kind of outer loop.
protocols = range(pickle.HIGHEST_PROTOCOL + 1)
-character_size = 4 if sys.maxunicode > 0xFFFF else 2
+ascii_char_size = 1
# Return True if opcode code appears in the pickle, else False.
@@ -747,6 +748,18 @@ class AbstractPickleTests(unittest.TestCase):
u = self.loads(s)
self.assertEqual(t, u)
+ def test_ellipsis(self):
+ for proto in protocols:
+ s = self.dumps(..., proto)
+ u = self.loads(s)
+ self.assertEqual(..., u)
+
+ def test_notimplemented(self):
+ for proto in protocols:
+ s = self.dumps(NotImplemented, proto)
+ u = self.loads(s)
+ self.assertEqual(NotImplemented, u)
+
# Tests for protocol 2
def test_proto(self):
@@ -880,6 +893,25 @@ class AbstractPickleTests(unittest.TestCase):
self.assertEqual(B(x), B(y), detail)
self.assertEqual(x.__dict__, y.__dict__, detail)
+ def test_newobj_proxies(self):
+ # NEWOBJ should use the __class__ rather than the raw type
+ classes = myclasses[:]
+ # Cannot create weakproxies to these classes
+ for c in (MyInt, MyTuple):
+ classes.remove(c)
+ for proto in protocols:
+ for C in classes:
+ B = C.__base__
+ x = C(C.sample)
+ x.foo = 42
+ p = weakref.proxy(x)
+ s = self.dumps(p, proto)
+ y = self.loads(s)
+ self.assertEqual(type(y), type(x)) # rather than type(p)
+ detail = (proto, C, B, x, y, type(y))
+ self.assertEqual(B(x), B(y), detail)
+ self.assertEqual(x.__dict__, y.__dict__, detail)
+
# Register a type with copyreg, with extension code extcode. Pickle
# an object of that type. Check that the resulting pickle uses opcode
# (EXT[124]) under proto 2, and not in proto 1.
@@ -1040,13 +1072,13 @@ class AbstractPickleTests(unittest.TestCase):
y = self.loads(s)
self.assertEqual(y._reduce_called, 1)
+ @no_tracing
def test_bad_getattr(self):
x = BadGetattr()
for proto in 0, 1:
self.assertRaises(RuntimeError, self.dumps, x, proto)
# protocol 2 don't raise a RuntimeError.
d = self.dumps(x, 2)
- self.assertRaises(RuntimeError, self.loads, d)
def test_reduce_bad_iterator(self):
# Issue4176: crash when 4th and 5th items of __reduce__()
@@ -1136,6 +1168,15 @@ class AbstractPickleTests(unittest.TestCase):
empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r')
self.assertEqual(empty, '')
+ def test_int_pickling_efficiency(self):
+ # Test compacity of int representation (see issue #12744)
+ for proto in protocols:
+ sizes = [len(self.dumps(2**n, proto)) for n in range(70)]
+ # the size function is monotonic
+ self.assertEqual(sorted(sizes), sizes)
+ if proto >= 2:
+ self.assertLessEqual(sizes[-1], 14)
+
def check_negative_32b_binXXX(self, dumped):
if sys.maxsize > 2**32:
self.skipTest("test is only meaningful on 32-bit builds")
@@ -1217,7 +1258,7 @@ class BigmemPickleTests(unittest.TestCase):
# All protocols use 1-byte per printable ASCII character; we add another
# byte because the encoded form has to be copied into the internal buffer.
- @bigmemtest(size=_2G, memuse=2 + character_size, dry_run=False)
+ @bigmemtest(size=_2G, memuse=2 + ascii_char_size, dry_run=False)
def test_huge_str_32b(self, size):
data = "abcd" * (size // 4)
try:
@@ -1234,7 +1275,7 @@ class BigmemPickleTests(unittest.TestCase):
# BINUNICODE (protocols 1, 2 and 3) cannot carry more than
# 2**32 - 1 bytes of utf-8 encoded unicode.
- @bigmemtest(size=_4G, memuse=1 + character_size, dry_run=False)
+ @bigmemtest(size=_4G, memuse=1 + ascii_char_size, dry_run=False)
def test_huge_str_64b(self, size):
data = "a" * size
try:
@@ -1578,6 +1619,105 @@ class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):
self.assertEqual(unpickler.load(), data)
+# Tests for dispatch_table attribute
+
+REDUCE_A = 'reduce_A'
+
+class AAA(object):
+ def __reduce__(self):
+ return str, (REDUCE_A,)
+
+class BBB(object):
+ pass
+
+class AbstractDispatchTableTests(unittest.TestCase):
+
+ def test_default_dispatch_table(self):
+ # No dispatch_table attribute by default
+ f = io.BytesIO()
+ p = self.pickler_class(f, 0)
+ with self.assertRaises(AttributeError):
+ p.dispatch_table
+ self.assertFalse(hasattr(p, 'dispatch_table'))
+
+ def test_class_dispatch_table(self):
+ # A dispatch_table attribute can be specified class-wide
+ dt = self.get_dispatch_table()
+
+ class MyPickler(self.pickler_class):
+ dispatch_table = dt
+
+ def dumps(obj, protocol=None):
+ f = io.BytesIO()
+ p = MyPickler(f, protocol)
+ self.assertEqual(p.dispatch_table, dt)
+ p.dump(obj)
+ return f.getvalue()
+
+ self._test_dispatch_table(dumps, dt)
+
+ def test_instance_dispatch_table(self):
+ # A dispatch_table attribute can also be specified instance-wide
+ dt = self.get_dispatch_table()
+
+ def dumps(obj, protocol=None):
+ f = io.BytesIO()
+ p = self.pickler_class(f, protocol)
+ p.dispatch_table = dt
+ self.assertEqual(p.dispatch_table, dt)
+ p.dump(obj)
+ return f.getvalue()
+
+ self._test_dispatch_table(dumps, dt)
+
+ def _test_dispatch_table(self, dumps, dispatch_table):
+ def custom_load_dump(obj):
+ return pickle.loads(dumps(obj, 0))
+
+ def default_load_dump(obj):
+ return pickle.loads(pickle.dumps(obj, 0))
+
+ # pickling complex numbers using protocol 0 relies on copyreg
+ # so check pickling a complex number still works
+ z = 1 + 2j
+ self.assertEqual(custom_load_dump(z), z)
+ self.assertEqual(default_load_dump(z), z)
+
+ # modify pickling of complex
+ REDUCE_1 = 'reduce_1'
+ def reduce_1(obj):
+ return str, (REDUCE_1,)
+ dispatch_table[complex] = reduce_1
+ self.assertEqual(custom_load_dump(z), REDUCE_1)
+ self.assertEqual(default_load_dump(z), z)
+
+ # check picklability of AAA and BBB
+ a = AAA()
+ b = BBB()
+ self.assertEqual(custom_load_dump(a), REDUCE_A)
+ self.assertIsInstance(custom_load_dump(b), BBB)
+ self.assertEqual(default_load_dump(a), REDUCE_A)
+ self.assertIsInstance(default_load_dump(b), BBB)
+
+ # modify pickling of BBB
+ dispatch_table[BBB] = reduce_1
+ self.assertEqual(custom_load_dump(a), REDUCE_A)
+ self.assertEqual(custom_load_dump(b), REDUCE_1)
+ self.assertEqual(default_load_dump(a), REDUCE_A)
+ self.assertIsInstance(default_load_dump(b), BBB)
+
+ # revert pickling of BBB and modify pickling of AAA
+ REDUCE_2 = 'reduce_2'
+ def reduce_2(obj):
+ return str, (REDUCE_2,)
+ dispatch_table[AAA] = reduce_2
+ del dispatch_table[BBB]
+ self.assertEqual(custom_load_dump(a), REDUCE_2)
+ self.assertIsInstance(custom_load_dump(b), BBB)
+ self.assertEqual(default_load_dump(a), REDUCE_A)
+ self.assertIsInstance(default_load_dump(b), BBB)
+
+
if __name__ == "__main__":
# Print some stuff that can be used to rewrite DATA{0,1,2}
from pickletools import dis
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
index 29f2bf0..e977e42 100755
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -20,6 +20,11 @@ python -E -Wd -m test [options] [test_name1 ...]
Options:
-h/--help -- print this text and exit
+--timeout TIMEOUT
+ -- dump the traceback and exit if a test takes more
+ than TIMEOUT seconds; disabled if TIMEOUT is negative
+ or equals to zero
+--wait -- wait for user input, e.g., allow a debugger to be attached
Verbosity
@@ -44,6 +49,9 @@ Selecting tests
-- specify which special resource intensive tests to run
-M/--memlimit LIMIT
-- run very large memory-consuming tests
+ --testdir DIR
+ -- execute test files in the specified directory (instead
+ of the Python stdlib test suite)
Special runs
@@ -125,6 +133,8 @@ resources to test. Currently only the following are defined:
all - Enable all special resources.
+ none - Disable all special resources (this is the default).
+
audio - Tests that use the audio device. (There are known
cases of broken audio drivers that can crash Python or
even the Linux kernel.)
@@ -155,8 +165,11 @@ example, to run all the tests except for the gui tests, give the
option '-uall,-gui'.
"""
+# We import importlib *ASAP* in order to test #15386
+import importlib
+
import builtins
-import errno
+import faulthandler
import getopt
import io
import json
@@ -166,6 +179,7 @@ import platform
import random
import re
import shutil
+import signal
import sys
import sysconfig
import tempfile
@@ -225,6 +239,7 @@ ENV_CHANGED = -1
SKIPPED = -2
RESOURCE_DENIED = -3
INTERRUPTED = -4
+CHILD_ERROR = -5 # error in a child process
from test import support
@@ -268,6 +283,18 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
on the command line.
"""
+ # Display the Python traceback on fatal errors (e.g. segfault)
+ faulthandler.enable(all_threads=True)
+
+ # Display the Python traceback on SIGALRM or SIGUSR1 signal
+ signals = []
+ if hasattr(signal, 'SIGALRM'):
+ signals.append(signal.SIGALRM)
+ if hasattr(signal, 'SIGUSR1'):
+ signals.append(signal.SIGUSR1)
+ for signum in signals:
+ faulthandler.register(signum, chain=True)
+
replace_stdout()
support.record_original_stdout(sys.stdout)
@@ -278,7 +305,8 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
'use=', 'threshold=', 'coverdir=', 'nocoverdir',
'runleaks', 'huntrleaks=', 'memlimit=', 'randseed=',
'multiprocess=', 'coverage', 'slaveargs=', 'forever', 'debug',
- 'start=', 'nowindows', 'header', 'failfast', 'match'])
+ 'start=', 'nowindows', 'header', 'testdir=', 'timeout=', 'wait',
+ 'failfast', 'match'])
except getopt.error as msg:
usage(msg)
@@ -289,6 +317,7 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
use_resources = []
debug = False
start = None
+ timeout = None
for o, a in opts:
if o in ('-h', '--help'):
print(__doc__)
@@ -332,7 +361,9 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
elif o in ('-T', '--coverage'):
trace = True
elif o in ('-D', '--coverdir'):
- coverdir = os.path.join(os.getcwd(), a)
+ # CWD is replaced with a temporary dir before calling main(), so we
+ # need join it with the saved CWD so it goes where the user expects.
+ coverdir = os.path.join(support.SAVEDCWD, a)
elif o in ('-N', '--nocoverdir'):
coverdir = None
elif o in ('-R', '--huntrleaks'):
@@ -350,9 +381,9 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
huntrleaks[1] = int(huntrleaks[1])
if len(huntrleaks) == 2 or not huntrleaks[2]:
huntrleaks[2:] = ["reflog.txt"]
- # Avoid false positives due to the character cache in
- # stringobject.c filling slowly with random data
- warm_char_cache()
+ # Avoid false positives due to various caches
+ # filling slowly with random data:
+ warm_caches()
elif o in ('-M', '--memlimit'):
support.set_memlimit(a)
elif o in ('-u', '--use'):
@@ -361,6 +392,9 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
if r == 'all':
use_resources[:] = RESOURCE_NAMES
continue
+ if r == 'none':
+ del use_resources[:]
+ continue
remove = False
if r[0] == '-':
remove = True
@@ -391,18 +425,45 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
forever = True
elif o in ('-j', '--multiprocess'):
use_mp = int(a)
+ if use_mp <= 0:
+ try:
+ import multiprocessing
+ # Use all cores + extras for tests that like to sleep
+ use_mp = 2 + multiprocessing.cpu_count()
+ except (ImportError, NotImplementedError):
+ use_mp = 3
+ if use_mp == 1:
+ use_mp = None
elif o == '--header':
header = True
elif o == '--slaveargs':
args, kwargs = json.loads(a)
try:
result = runtest(*args, **kwargs)
+ except KeyboardInterrupt:
+ result = INTERRUPTED, ''
except BaseException as e:
- result = INTERRUPTED, e.__class__.__name__
+ traceback.print_exc()
+ result = CHILD_ERROR, str(e)
sys.stdout.flush()
print() # Force a newline (just in case)
print(json.dumps(result))
sys.exit(0)
+ elif o == '--testdir':
+ # CWD is replaced with a temporary dir before calling main(), so we
+ # join it with the saved CWD so it ends up where the user expects.
+ testdir = os.path.join(support.SAVEDCWD, a)
+ elif o == '--timeout':
+ if hasattr(faulthandler, 'dump_tracebacks_later'):
+ timeout = float(a)
+ if timeout <= 0:
+ timeout = None
+ else:
+ print("Warning: The timeout option requires "
+ "faulthandler.dump_tracebacks_later")
+ timeout = None
+ elif o == '--wait':
+ input("Press any key to continue...")
else:
print(("No handler for option {}. Please report this as a bug "
"at http://bugs.python.org.").format(o), file=sys.stderr)
@@ -481,7 +542,13 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
print("== ", os.getcwd())
print("Testing with flags:", sys.flags)
- alltests = findtests(testdir, stdtests, nottests)
+ # if testdir is set, then we are not running the python tests suite, so
+ # don't add default tests to be executed or skipped (pass empty values)
+ if testdir:
+ alltests = findtests(testdir, list(), set())
+ else:
+ alltests = findtests(testdir, stdtests, nottests)
+
selected = tests or args or alltests
if single:
selected = selected[:1]
@@ -501,7 +568,7 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
random.shuffle(selected)
if trace:
import trace, tempfile
- tracer = trace.Trace(ignoredirs=[sys.prefix, sys.exec_prefix,
+ tracer = trace.Trace(ignoredirs=[sys.base_prefix, sys.base_exec_prefix,
tempfile.gettempdir()],
trace=False, count=True)
@@ -566,7 +633,8 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
(test, verbose, quiet),
dict(huntrleaks=huntrleaks, use_resources=use_resources,
debug=debug, output_on_failure=verbose3,
- failfast=failfast, match_tests=match_tests)
+ timeout=timeout, failfast=failfast,
+ match_tests=match_tests)
)
# -E is needed by some tests, e.g. test_import
# Running the child from the same working directory ensures
@@ -578,10 +646,15 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
close_fds=(os.name != 'nt'),
cwd=support.SAVEDCWD)
stdout, stderr = popen.communicate()
+ retcode = popen.wait()
# Strip last refcount output line if it exists, since it
# comes from the shutdown of the interpreter in the subcommand.
stderr = debug_output_pat.sub("", stderr)
stdout, _, result = stdout.strip().rpartition("\n")
+ if retcode != 0:
+ result = (CHILD_ERROR, "Exit code %s" % retcode)
+ output.put((test, stdout.rstrip(), stderr.rstrip(), result))
+ return
if not result:
output.put((None, None, None, None))
return
@@ -614,8 +687,9 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
sys.stdout.flush()
sys.stderr.flush()
if result[0] == INTERRUPTED:
- assert result[1] == 'KeyboardInterrupt'
- raise KeyboardInterrupt # What else?
+ raise KeyboardInterrupt
+ if result[0] == CHILD_ERROR:
+ raise Exception("Child error on {}: {}".format(test, result[1]))
test_index += 1
except KeyboardInterrupt:
interrupted = True
@@ -632,13 +706,14 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
if trace:
# If we're tracing code coverage, then we don't exit with status
# if on a false return value from main.
- tracer.runctx('runtest(test, verbose, quiet)',
+ tracer.runctx('runtest(test, verbose, quiet, timeout=timeout)',
globals=globals(), locals=vars())
else:
try:
result = runtest(test, verbose, quiet, huntrleaks, debug,
output_on_failure=verbose3,
- failfast=failfast, match_tests=match_tests)
+ timeout=timeout, failfast=failfast,
+ match_tests=match_tests)
accumulate_result(test, result)
except KeyboardInterrupt:
interrupted = True
@@ -709,7 +784,7 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
sys.stdout.flush()
try:
verbose = True
- ok = runtest(test, True, quiet, huntrleaks, debug)
+ ok = runtest(test, True, quiet, huntrleaks, debug, timeout=timeout)
except KeyboardInterrupt:
# print a newline separate from the ^C
print()
@@ -734,6 +809,8 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
sys.exit(len(bad) > 0 or interrupted)
+# small set of tests to determine if we have a basically functioning interpreter
+# (i.e. if any of these fail, then anything else is likely to follow)
STDTESTS = [
'test_grammar',
'test_opcodes',
@@ -744,12 +821,11 @@ STDTESTS = [
'test_unittest',
'test_doctest',
'test_doctest2',
+ 'test_support'
]
-NOTTESTS = {
- 'test_future1',
- 'test_future2',
-}
+# set of tests that we don't want to be executed when using regrtest
+NOTTESTS = set()
def findtests(testdir=None, stdtests=STDTESTS, nottests=NOTTESTS):
"""Return a list of all applicable test modules."""
@@ -758,9 +834,9 @@ def findtests(testdir=None, stdtests=STDTESTS, nottests=NOTTESTS):
tests = []
others = set(stdtests) | nottests
for name in names:
- modname, ext = os.path.splitext(name)
- if modname[:5] == "test_" and ext == ".py" and modname not in others:
- tests.append(modname)
+ mod, ext = os.path.splitext(name)
+ if mod[:5] == "test_" and ext in (".py", "") and mod not in others:
+ tests.append(mod)
return stdtests + sorted(tests)
# We do not use a generator so multiple threads can call next().
@@ -785,17 +861,14 @@ class MultiprocessTests(object):
def replace_stdout():
"""Set stdout encoder error handler to backslashreplace (as stderr error
handler) to avoid UnicodeEncodeError when printing a traceback"""
- if os.name == "nt":
- # Replace sys.stdout breaks the stdout newlines on Windows: issue #8533
- return
-
import atexit
stdout = sys.stdout
sys.stdout = open(stdout.fileno(), 'w',
encoding=stdout.encoding,
errors="backslashreplace",
- closefd=False)
+ closefd=False,
+ newline='\n')
def restore_stdout():
sys.stdout.close()
@@ -804,7 +877,8 @@ def replace_stdout():
def runtest(test, verbose, quiet,
huntrleaks=False, debug=False, use_resources=None,
- output_on_failure=False, failfast=False, match_tests=None):
+ output_on_failure=False, failfast=False, match_tests=None,
+ timeout=None):
"""Run a single test.
test -- the name of the test
@@ -814,6 +888,8 @@ def runtest(test, verbose, quiet,
huntrleaks -- run multiple times to test for leaks; requires a debug
build; a triple corresponding to -R's three arguments
output_on_failure -- if true, display test output on failure
+ timeout -- dump the traceback and exit if a test takes more than
+ timeout seconds
Returns one of the test result constants:
INTERRUPTED KeyboardInterrupt when run under -j
@@ -826,6 +902,9 @@ def runtest(test, verbose, quiet,
if use_resources is not None:
support.use_resources = use_resources
+ use_timeout = (timeout is not None)
+ if use_timeout:
+ faulthandler.dump_tracebacks_later(timeout, exit=True)
try:
support.match_tests = match_tests
if failfast:
@@ -864,6 +943,8 @@ def runtest(test, verbose, quiet,
display_failure=not verbose)
return result
finally:
+ if use_timeout:
+ faulthandler.cancel_dump_tracebacks_later()
cleanup_test_droppings(test, verbose)
runtest.stringio = None
@@ -909,10 +990,10 @@ class saved_test_environment:
resources = ('sys.argv', 'cwd', 'sys.stdin', 'sys.stdout', 'sys.stderr',
'os.environ', 'sys.path', 'sys.path_hooks', '__import__',
'warnings.filters', 'asyncore.socket_map',
- 'logging._handlers', 'logging._handlerList',
- 'shutil.archive_formats', 'shutil.unpack_formats',
+ 'logging._handlers', 'logging._handlerList', 'sys.gettrace',
'sys.warnoptions', 'threading._dangling',
'multiprocessing.process._dangling',
+ 'sysconfig._CONFIG_VARS', 'sysconfig._INSTALL_SCHEMES',
'support.TESTFN',
)
@@ -961,6 +1042,11 @@ class saved_test_environment:
sys.path_hooks = saved_hooks[1]
sys.path_hooks[:] = saved_hooks[2]
+ def get_sys_gettrace(self):
+ return sys.gettrace()
+ def restore_sys_gettrace(self, trace_fxn):
+ sys.settrace(trace_fxn)
+
def get___import__(self):
return builtins.__import__
def restore___import__(self, import_):
@@ -1044,6 +1130,24 @@ class saved_test_environment:
multiprocessing.process._dangling.clear()
multiprocessing.process._dangling.update(saved)
+ def get_sysconfig__CONFIG_VARS(self):
+ # make sure the dict is initialized
+ sysconfig.get_config_var('prefix')
+ return (id(sysconfig._CONFIG_VARS), sysconfig._CONFIG_VARS,
+ dict(sysconfig._CONFIG_VARS))
+ def restore_sysconfig__CONFIG_VARS(self, saved):
+ sysconfig._CONFIG_VARS = saved[1]
+ sysconfig._CONFIG_VARS.clear()
+ sysconfig._CONFIG_VARS.update(saved[2])
+
+ def get_sysconfig__INSTALL_SCHEMES(self):
+ return (id(sysconfig._INSTALL_SCHEMES), sysconfig._INSTALL_SCHEMES,
+ sysconfig._INSTALL_SCHEMES.copy())
+ def restore_sysconfig__INSTALL_SCHEMES(self, saved):
+ sysconfig._INSTALL_SCHEMES = saved[1]
+ sysconfig._INSTALL_SCHEMES.clear()
+ sysconfig._INSTALL_SCHEMES.update(saved[2])
+
def get_support_TESTFN(self):
if os.path.isfile(support.TESTFN):
result = 'f'
@@ -1108,14 +1212,15 @@ def runtest_inner(test, verbose, quiet,
start_time = time.time()
the_package = __import__(abstest, globals(), locals(), [])
the_module = getattr(the_package, test)
- # Old tests run to completion simply as a side-effect of
- # being imported. For tests based on unittest or doctest,
- # explicitly invoke their test_main() function (if it exists).
- indirect_test = getattr(the_module, "test_main", None)
- if indirect_test is not None:
- indirect_test()
+ # If the test has a test_main, that will run the appropriate
+ # tests. If not, use normal unittest test loading.
+ test_runner = getattr(the_module, "test_main", None)
+ if test_runner is None:
+ tests = unittest.TestLoader().loadTestsFromModule(the_module)
+ test_runner = lambda: support.run_unittest(tests)
+ test_runner()
if huntrleaks:
- refleak = dash_R(the_module, test, indirect_test,
+ refleak = dash_R(the_module, test, test_runner,
huntrleaks)
test_time = time.time() - start_time
except support.ResourceDenied as msg:
@@ -1198,7 +1303,8 @@ def dash_R(the_module, test, indirect_test, huntrleaks):
False if the test didn't leak references; True if we detected refleaks.
"""
# This code is hackish and inelegant, but it seems to do the job.
- import copyreg, _abcoll
+ import copyreg
+ import collections.abc
if not hasattr(sys, 'gettotalrefcount'):
raise Exception("Tracking reference leaks requires a debug build "
@@ -1215,7 +1321,7 @@ def dash_R(the_module, test, indirect_test, huntrleaks):
else:
zdc = zipimport._zip_directory_cache.copy()
abcs = {}
- for abc in [getattr(_abcoll, a) for a in _abcoll.__all__]:
+ for abc in [getattr(collections.abc, a) for a in collections.abc.__all__]:
if not isabstract(abc):
continue
for obj in abc.__subclasses__() + [abc]:
@@ -1261,7 +1367,7 @@ def dash_R_cleanup(fs, ps, pic, zdc, abcs):
import gc, copyreg
import _strptime, linecache
import urllib.parse, urllib.request, mimetypes, doctest
- import struct, filecmp, _abcoll
+ import struct, filecmp, collections.abc
from distutils.dir_util import _path_created
from weakref import WeakSet
@@ -1288,7 +1394,7 @@ def dash_R_cleanup(fs, ps, pic, zdc, abcs):
sys._clear_type_cache()
# Clear ABC registries, restoring previously saved ABC registries.
- for abc in [getattr(_abcoll, a) for a in _abcoll.__all__]:
+ for abc in [getattr(collections.abc, a) for a in collections.abc.__all__]:
if not isabstract(abc):
continue
for obj in abc.__subclasses__() + [abc]:
@@ -1324,10 +1430,15 @@ def dash_R_cleanup(fs, ps, pic, zdc, abcs):
# Collect cyclic trash.
gc.collect()
-def warm_char_cache():
+def warm_caches():
+ # char cache
s = bytes(range(256))
for i in range(256):
s[i:i+1]
+ # unicode cache
+ x = [chr(i) for i in range(256)]
+ # int cache
+ x = list(range(-5, 257))
def findtestdir(path=None):
return path or os.path.dirname(__file__) or os.curdir
@@ -1374,13 +1485,14 @@ def printlist(x, width=70, indent=4):
# Tests that are expected to be skipped everywhere except on one platform
# are also handled separately.
-_expectations = {
- 'win32':
+_expectations = (
+ ('win32',
"""
test__locale
test_crypt
test_curses
test_dbm
+ test_devpoll
test_fcntl
test_fork1
test_epoll
@@ -1403,15 +1515,16 @@ _expectations = {
test_threadsignals
test_wait3
test_wait4
- """,
- 'linux2':
+ """),
+ ('linux',
"""
test_curses
+ test_devpoll
test_largefile
test_kqueue
test_ossaudiodev
- """,
- 'unixware7':
+ """),
+ ('unixware',
"""
test_epoll
test_largefile
@@ -1421,8 +1534,8 @@ _expectations = {
test_pyexpat
test_sax
test_sundry
- """,
- 'openunix8':
+ """),
+ ('openunix',
"""
test_epoll
test_largefile
@@ -1432,8 +1545,8 @@ _expectations = {
test_pyexpat
test_sax
test_sundry
- """,
- 'sco_sv3':
+ """),
+ ('sco_sv',
"""
test_asynchat
test_fork1
@@ -1452,11 +1565,12 @@ _expectations = {
test_threaded_import
test_threadedtempfile
test_threading
- """,
- 'darwin':
+ """),
+ ('darwin',
"""
test__locale
test_curses
+ test_devpoll
test_epoll
test_dbm_gnu
test_gdb
@@ -1465,8 +1579,8 @@ _expectations = {
test_minidom
test_ossaudiodev
test_poll
- """,
- 'sunos5':
+ """),
+ ('sunos',
"""
test_curses
test_dbm
@@ -1477,8 +1591,8 @@ _expectations = {
test_openpty
test_zipfile
test_zlib
- """,
- 'hp-ux11':
+ """),
+ ('hp-ux',
"""
test_curses
test_epoll
@@ -1493,11 +1607,12 @@ _expectations = {
test_sax
test_zipfile
test_zlib
- """,
- 'cygwin':
+ """),
+ ('cygwin',
"""
test_curses
test_dbm
+ test_devpoll
test_epoll
test_ioctl
test_kqueue
@@ -1505,8 +1620,8 @@ _expectations = {
test_locale
test_ossaudiodev
test_socketserver
- """,
- 'os2emx':
+ """),
+ ('os2emx',
"""
test_audioop
test_curses
@@ -1519,9 +1634,10 @@ _expectations = {
test_pty
test_resource
test_signal
- """,
- 'freebsd4':
+ """),
+ ('freebsd',
"""
+ test_devpoll
test_epoll
test_dbm_gnu
test_locale
@@ -1536,8 +1652,8 @@ _expectations = {
test_timeout
test_urllibnet
test_multiprocessing
- """,
- 'aix5':
+ """),
+ ('aix',
"""
test_bz2
test_epoll
@@ -1551,10 +1667,11 @@ _expectations = {
test_ttk_textonly
test_zipimport
test_zlib
- """,
- 'openbsd3':
+ """),
+ ('openbsd',
"""
test_ctypes
+ test_devpoll
test_epoll
test_dbm_gnu
test_locale
@@ -1566,11 +1683,12 @@ _expectations = {
test_ttk_guionly
test_ttk_textonly
test_multiprocessing
- """,
- 'netbsd3':
+ """),
+ ('netbsd',
"""
test_ctypes
test_curses
+ test_devpoll
test_epoll
test_dbm_gnu
test_locale
@@ -1581,12 +1699,8 @@ _expectations = {
test_ttk_guionly
test_ttk_textonly
test_multiprocessing
- """,
-}
-_expectations['freebsd5'] = _expectations['freebsd4']
-_expectations['freebsd6'] = _expectations['freebsd4']
-_expectations['freebsd7'] = _expectations['freebsd4']
-_expectations['freebsd8'] = _expectations['freebsd4']
+ """),
+)
class _ExpectedSkips:
def __init__(self):
@@ -1594,9 +1708,13 @@ class _ExpectedSkips:
from test import test_timeout
self.valid = False
- if sys.platform in _expectations:
- s = _expectations[sys.platform]
- self.expected = set(s.split())
+ expected = None
+ for item in _expectations:
+ if sys.platform.startswith(item[0]):
+ expected = item[1]
+ break
+ if expected is not None:
+ self.expected = set(expected.split())
# These are broken tests, for now skipped on every platform.
# XXX Fix these!
@@ -1656,9 +1774,8 @@ def _make_temp_dir_for_build(TEMPDIR):
TEMPDIR = os.path.abspath(TEMPDIR)
try:
os.mkdir(TEMPDIR)
- except OSError as e:
- if e.errno != errno.EEXIST:
- raise
+ except FileExistsError:
+ pass
# Define a writable temp dir that will be used as cwd while running
# the tests. The name of the dir includes the pid to allow parallel
diff --git a/Lib/test/reperf.py b/Lib/test/reperf.py
index 7c68234..e93bacd 100644
--- a/Lib/test/reperf.py
+++ b/Lib/test/reperf.py
@@ -9,13 +9,13 @@ def main():
timefunc(10, p.findall, s)
def timefunc(n, func, *args, **kw):
- t0 = time.clock()
+ t0 = time.perf_counter()
try:
for i in range(n):
result = func(*args, **kw)
return result
finally:
- t1 = time.clock()
+ t1 = time.perf_counter()
if n > 1:
print(n, "times", end=' ')
print(func.__name__, "%.3f" % (t1-t0), "CPU seconds")
diff --git a/Lib/test/script_helper.py b/Lib/test/script_helper.py
index ba446cd..b09f4bf 100644
--- a/Lib/test/script_helper.py
+++ b/Lib/test/script_helper.py
@@ -1,6 +1,7 @@
# Common utility functions used by various script execution tests
# e.g. test_cmd_line, test_cmd_line_script and test_runpy
+import importlib
import sys
import os
import os.path
@@ -59,11 +60,12 @@ def assert_python_failure(*args, **env_vars):
"""
return _assert_python(False, *args, **env_vars)
-def spawn_python(*args):
+def spawn_python(*args, **kw):
cmd_line = [sys.executable, '-E']
cmd_line.extend(args)
return subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+ **kw)
def kill_python(p):
p.stdin.close()
@@ -92,6 +94,7 @@ def make_script(script_dir, script_basename, source):
script_file = open(script_name, 'w', encoding='utf-8')
script_file.write(source)
script_file.close()
+ importlib.invalidate_caches()
return script_name
def make_zip_script(zip_dir, zip_basename, script_name, name_in_zip=None):
diff --git a/Lib/test/seq_tests.py b/Lib/test/seq_tests.py
index f655c29..f185a82 100644
--- a/Lib/test/seq_tests.py
+++ b/Lib/test/seq_tests.py
@@ -4,6 +4,7 @@ Tests common to tuple, list and UserList.UserList
import unittest
import sys
+import pickle
# Various iterables
# This is used for checking the constructor (here and in test_deque.py)
@@ -388,3 +389,9 @@ class CommonTest(unittest.TestCase):
self.assertEqual(a.index(0, -4*sys.maxsize, 4*sys.maxsize), 2)
self.assertRaises(ValueError, a.index, 0, 4*sys.maxsize,-4*sys.maxsize)
self.assertRaises(ValueError, a.index, 2, 0, -10)
+
+ def test_pickle(self):
+ lst = self.type2test([4, 5, 6, 7])
+ lst2 = pickle.loads(pickle.dumps(lst))
+ self.assertEqual(lst2, lst)
+ self.assertNotEqual(id(lst2), id(lst))
diff --git a/Lib/test/sortperf.py b/Lib/test/sortperf.py
index 0ce88de..af7c0b4 100644
--- a/Lib/test/sortperf.py
+++ b/Lib/test/sortperf.py
@@ -57,9 +57,9 @@ def flush():
sys.stdout.flush()
def doit(L):
- t0 = time.clock()
+ t0 = time.perf_counter()
L.sort()
- t1 = time.clock()
+ t1 = time.perf_counter()
print("%6.2f" % (t1-t0), end=' ')
flush()
diff --git a/Lib/test/ssl_key.passwd.pem b/Lib/test/ssl_key.passwd.pem
new file mode 100644
index 0000000..2524672
--- /dev/null
+++ b/Lib/test/ssl_key.passwd.pem
@@ -0,0 +1,18 @@
+-----BEGIN RSA PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: DES-EDE3-CBC,1A8D9D2A02EC698A
+
+kJYbfZ8L0sfe9Oty3gw0aloNnY5E8fegRfQLZlNoxTl6jNt0nIwI8kDJ36CZgR9c
+u3FDJm/KqrfUoz8vW+qEnWhSG7QPX2wWGPHd4K94Yz/FgrRzZ0DoK7XxXq9gOtVA
+AVGQhnz32p+6WhfGsCr9ArXEwRZrTk/FvzEPaU5fHcoSkrNVAGX8IpSVkSDwEDQr
+Gv17+cfk99UV1OCza6yKHoFkTtrC+PZU71LomBabivS2Oc4B9hYuSR2hF01wTHP+
+YlWNagZOOVtNz4oKK9x9eNQpmfQXQvPPTfusexKIbKfZrMvJoxcm1gfcZ0H/wK6P
+6wmXSG35qMOOztCZNtperjs1wzEBXznyK8QmLcAJBjkfarABJX9vBEzZV0OUKhy+
+noORFwHTllphbmydLhu6ehLUZMHPhzAS5UN7srtpSN81eerDMy0RMUAwA7/PofX1
+94Me85Q8jP0PC9ETdsJcPqLzAPETEYu0ELewKRcrdyWi+tlLFrpE5KT/s5ecbl9l
+7B61U4Kfd1PIXc/siINhU3A3bYK+845YyUArUOnKf1kEox7p1RpD7yFqVT04lRTo
+cibNKATBusXSuBrp2G6GNuhWEOSafWCKJQAzgCYIp6ZTV2khhMUGppc/2H3CF6cO
+zX0KtlPVZC7hLkB6HT8SxYUwF1zqWY7+/XPPdc37MeEZ87Q3UuZwqORLY+Z0hpgt
+L5JXBCoklZhCAaN2GqwFLXtGiRSRFGY7xXIhbDTlE65Wv1WGGgDLMKGE1gOz3yAo
+2jjG1+yAHJUdE69XTFHSqSkvaloA1W03LdMXZ9VuQJ/ySXCie6ABAQ==
+-----END RSA PRIVATE KEY-----
diff --git a/Lib/test/ssl_servers.py b/Lib/test/ssl_servers.py
index 77c0542..8686153 100644
--- a/Lib/test/ssl_servers.py
+++ b/Lib/test/ssl_servers.py
@@ -94,7 +94,12 @@ class StatsRequestHandler(BaseHTTPRequestHandler):
"""Serve a GET request."""
sock = self.rfile.raw._sock
context = sock.context
- body = pprint.pformat(context.session_stats())
+ stats = {
+ 'session_cache': context.session_stats(),
+ 'cipher': sock.cipher(),
+ 'compression': sock.compression(),
+ }
+ body = pprint.pformat(stats)
body = body.encode('utf-8')
self.send_response(200)
self.send_header("Content-type", "text/plain; charset=utf-8")
@@ -172,6 +177,11 @@ if __name__ == "__main__":
action='store_false', help='be less verbose')
parser.add_argument('-s', '--stats', dest='use_stats_handler', default=False,
action='store_true', help='always return stats page')
+ parser.add_argument('--curve-name', dest='curve_name', type=str,
+ action='store',
+ help='curve name for EC-based Diffie-Hellman')
+ parser.add_argument('--dh', dest='dh_file', type=str, action='store',
+ help='PEM file containing DH parameters')
args = parser.parse_args()
support.verbose = args.verbose
@@ -182,6 +192,10 @@ if __name__ == "__main__":
handler_class.root = os.getcwd()
context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
context.load_cert_chain(CERTFILE)
+ if args.curve_name:
+ context.set_ecdh_curve(args.curve_name)
+ if args.dh_file:
+ context.load_dh_params(args.dh_file)
server = HTTPSServer(("", args.port), handler_class, context)
if args.verbose:
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 413f9dd..e4688d0 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -28,6 +28,11 @@ class BaseTest(unittest.TestCase):
# Change in subclasses to change the behaviour of fixtesttype()
type2test = None
+ # Whether the "contained items" of the container are integers in
+ # range(0, 256) (i.e. bytes, bytearray) or strings of length 1
+ # (str)
+ contains_bytes = False
+
# All tests pass their arguments to the testing methods
# as str objects. fixtesttype() can be used to propagate
# these arguments to the appropriate type
@@ -47,11 +52,12 @@ class BaseTest(unittest.TestCase):
return obj
# check that obj.method(*args) returns result
- def checkequal(self, result, obj, methodname, *args):
+ def checkequal(self, result, obj, methodname, *args, **kwargs):
result = self.fixtype(result)
obj = self.fixtype(obj)
args = self.fixtype(args)
- realresult = getattr(obj, methodname)(*args)
+ kwargs = {k: self.fixtype(v) for k,v in kwargs.items()}
+ realresult = getattr(obj, methodname)(*args, **kwargs)
self.assertEqual(
result,
realresult
@@ -116,7 +122,11 @@ class BaseTest(unittest.TestCase):
self.checkequal(0, '', 'count', 'xx', sys.maxsize, 0)
self.checkraises(TypeError, 'hello', 'count')
- self.checkraises(TypeError, 'hello', 'count', 42)
+
+ if self.contains_bytes:
+ self.checkequal(0, 'hello', 'count', 42)
+ else:
+ self.checkraises(TypeError, 'hello', 'count', 42)
# For a variety of combinations,
# verify that str.count() matches an equivalent function
@@ -162,7 +172,11 @@ class BaseTest(unittest.TestCase):
self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a', None, 6)
self.checkraises(TypeError, 'hello', 'find')
- self.checkraises(TypeError, 'hello', 'find', 42)
+
+ if self.contains_bytes:
+ self.checkequal(-1, 'hello', 'find', 42)
+ else:
+ self.checkraises(TypeError, 'hello', 'find', 42)
self.checkequal(0, '', 'find', '')
self.checkequal(-1, '', 'find', '', 1, 1)
@@ -216,7 +230,11 @@ class BaseTest(unittest.TestCase):
self.checkequal( 2, 'rrarrrrrrrrra', 'rfind', 'a', None, 6)
self.checkraises(TypeError, 'hello', 'rfind')
- self.checkraises(TypeError, 'hello', 'rfind', 42)
+
+ if self.contains_bytes:
+ self.checkequal(-1, 'hello', 'rfind', 42)
+ else:
+ self.checkraises(TypeError, 'hello', 'rfind', 42)
# For a variety of combinations,
# verify that str.rfind() matches __contains__
@@ -244,6 +262,9 @@ class BaseTest(unittest.TestCase):
# issue 7458
self.checkequal(-1, 'ab', 'rfind', 'xxx', sys.maxsize + 1, 0)
+ # issue #15534
+ self.checkequal(0, '<......\u043c...', "rfind", "<")
+
def test_index(self):
self.checkequal(0, 'abcdefghiabc', 'index', '')
self.checkequal(3, 'abcdefghiabc', 'index', 'def')
@@ -263,7 +284,11 @@ class BaseTest(unittest.TestCase):
self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a', None, 6)
self.checkraises(TypeError, 'hello', 'index')
- self.checkraises(TypeError, 'hello', 'index', 42)
+
+ if self.contains_bytes:
+ self.checkraises(ValueError, 'hello', 'index', 42)
+ else:
+ self.checkraises(TypeError, 'hello', 'index', 42)
def test_rindex(self):
self.checkequal(12, 'abcdefghiabc', 'rindex', '')
@@ -285,7 +310,11 @@ class BaseTest(unittest.TestCase):
self.checkequal( 2, 'rrarrrrrrrrra', 'rindex', 'a', None, 6)
self.checkraises(TypeError, 'hello', 'rindex')
- self.checkraises(TypeError, 'hello', 'rindex', 42)
+
+ if self.contains_bytes:
+ self.checkraises(ValueError, 'hello', 'rindex', 42)
+ else:
+ self.checkraises(TypeError, 'hello', 'rindex', 42)
def test_lower(self):
self.checkequal('hello', 'HeLLo', 'lower')
@@ -363,6 +392,17 @@ class BaseTest(unittest.TestCase):
self.checkequal(['a']*18 + ['aBLAHa'], ('aBLAH'*20)[:-4],
'split', 'BLAH', 18)
+ # with keyword args
+ self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', sep='|')
+ self.checkequal(['a', 'b|c|d'],
+ 'a|b|c|d', 'split', '|', maxsplit=1)
+ self.checkequal(['a', 'b|c|d'],
+ 'a|b|c|d', 'split', sep='|', maxsplit=1)
+ self.checkequal(['a', 'b|c|d'],
+ 'a|b|c|d', 'split', maxsplit=1, sep='|')
+ self.checkequal(['a', 'b c d'],
+ 'a b c d', 'split', maxsplit=1)
+
# argument type
self.checkraises(TypeError, 'hello', 'split', 42, 42, 42)
@@ -420,6 +460,17 @@ class BaseTest(unittest.TestCase):
self.checkequal(['aBLAHa'] + ['a']*18, ('aBLAH'*20)[:-4],
'rsplit', 'BLAH', 18)
+ # with keyword args
+ self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', sep='|')
+ self.checkequal(['a|b|c', 'd'],
+ 'a|b|c|d', 'rsplit', '|', maxsplit=1)
+ self.checkequal(['a|b|c', 'd'],
+ 'a|b|c|d', 'rsplit', sep='|', maxsplit=1)
+ self.checkequal(['a|b|c', 'd'],
+ 'a|b|c|d', 'rsplit', maxsplit=1, sep='|')
+ self.checkequal(['a b c', 'd'],
+ 'a b c d', 'rsplit', maxsplit=1)
+
# argument type
self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42)
@@ -549,6 +600,8 @@ class BaseTest(unittest.TestCase):
EQ("ReyKKjavik", "Reykjavik", "replace", "k", "KK", 1)
EQ("Reykjavik", "Reykjavik", "replace", "k", "KK", 0)
EQ("A----B----C----", "A.B.C.", "replace", ".", "----")
+ # issue #15534
+ EQ('...\u043c......&lt;', '...\u043c......<', "replace", "<", "&lt;")
EQ("Reykjavik", "Reykjavik", "replace", "q", "KK")
@@ -643,7 +696,7 @@ class CommonTest(BaseTest):
# check that titlecased chars are lowered correctly
# \u1ffc is the titlecased char
- self.checkequal('\u1ffc\u1ff3\u1ff3\u1ff3',
+ self.checkequal('\u03a9\u0399\u1ff3\u1ff3\u1ff3',
'\u1ff3\u1ff3\u1ffc\u1ffc', 'capitalize')
# check with cased non-letter chars
self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd',
@@ -908,7 +961,14 @@ class MixinStrUnicodeUserStringTest:
self.checkequal(['abc', 'def', 'ghi'], "abc\ndef\r\nghi\n", 'splitlines')
self.checkequal(['abc', 'def', 'ghi', ''], "abc\ndef\r\nghi\n\r", 'splitlines')
self.checkequal(['', 'abc', 'def', 'ghi', ''], "\nabc\ndef\r\nghi\n\r", 'splitlines')
- self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'], "\nabc\ndef\r\nghi\n\r", 'splitlines', 1)
+ self.checkequal(['', 'abc', 'def', 'ghi', ''],
+ "\nabc\ndef\r\nghi\n\r", 'splitlines', False)
+ self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'],
+ "\nabc\ndef\r\nghi\n\r", 'splitlines', True)
+ self.checkequal(['', 'abc', 'def', 'ghi', ''], "\nabc\ndef\r\nghi\n\r",
+ 'splitlines', keepends=False)
+ self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'],
+ "\nabc\ndef\r\nghi\n\r", 'splitlines', keepends=True)
self.checkraises(TypeError, 'abc', 'splitlines', 42, 42)
@@ -1142,6 +1202,10 @@ class MixinStrUnicodeUserStringTest:
self.checkraises(TypeError, '%10.*f', '__mod__', ('foo', 42.))
self.checkraises(ValueError, '%10', '__mod__', (42,))
+ # Outrageously large width or precision should raise ValueError.
+ self.checkraises(ValueError, '%%%df' % (2**64), '__mod__', (3.2))
+ self.checkraises(ValueError, '%%.%df' % (2**64), '__mod__', (3.2))
+
class X(object): pass
self.checkraises(TypeError, 'abc', '__mod__', X())
@@ -1260,6 +1324,9 @@ class MixinStrUnicodeUserStringTest:
self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith,
x, None, None, None)
+ # issue #15534
+ self.checkequal(10, "...\u043c......<", "find", "<")
+
class MixinStrUnicodeTest:
# Additional tests that only work with str and unicode.
diff --git a/Lib/test/support.py b/Lib/test/support.py
index e1ec9e2..c5640e0 100644
--- a/Lib/test/support.py
+++ b/Lib/test/support.py
@@ -15,7 +15,7 @@ import shutil
import warnings
import unittest
import importlib
-import collections
+import collections.abc
import re
import subprocess
import imp
@@ -37,26 +37,41 @@ try:
except ImportError:
multiprocessing = None
+try:
+ import zlib
+except ImportError:
+ zlib = None
+
+try:
+ import bz2
+except ImportError:
+ bz2 = None
+
+try:
+ import lzma
+except ImportError:
+ lzma = None
__all__ = [
- "Error", "TestFailed", "ResourceDenied", "import_module",
- "verbose", "use_resources", "max_memuse", "record_original_stdout",
+ "Error", "TestFailed", "ResourceDenied", "import_module", "verbose",
+ "use_resources", "max_memuse", "record_original_stdout",
"get_original_stdout", "unload", "unlink", "rmtree", "forget",
- "is_resource_enabled", "requires", "requires_mac_ver",
- "find_unused_port", "bind_port",
- "fcmp", "is_jython", "TESTFN", "HOST", "FUZZ", "SAVEDCWD", "temp_cwd",
- "findfile", "sortdict", "check_syntax_error", "open_urlresource",
- "check_warnings", "CleanImport", "EnvironmentVarGuard",
- "TransientResource", "captured_output", "captured_stdout",
- "captured_stdin", "captured_stderr",
- "time_out", "socket_peer_reset", "ioerror_peer_reset",
- "run_with_locale", 'temp_umask', "transient_internet",
- "set_memlimit", "bigmemtest", "bigaddrspacetest", "BasicTestRunner",
- "run_unittest", "run_doctest", "threading_setup", "threading_cleanup",
- "reap_children", "cpython_only", "check_impl_detail", "get_attribute",
- "swap_item", "swap_attr", "requires_IEEE_754",
+ "is_resource_enabled", "requires", "requires_freebsd_version",
+ "requires_linux_version", "requires_mac_ver", "find_unused_port",
+ "bind_port", "IPV6_ENABLED", "is_jython", "TESTFN", "HOST", "SAVEDCWD",
+ "temp_cwd", "findfile", "create_empty_file", "sortdict",
+ "check_syntax_error", "open_urlresource", "check_warnings", "CleanImport",
+ "EnvironmentVarGuard", "TransientResource", "captured_stdout",
+ "captured_stdin", "captured_stderr", "time_out", "socket_peer_reset",
+ "ioerror_peer_reset", "run_with_locale", 'temp_umask',
+ "transient_internet", "set_memlimit", "bigmemtest", "bigaddrspacetest",
+ "BasicTestRunner", "run_unittest", "run_doctest", "threading_setup",
+ "threading_cleanup", "reap_children", "cpython_only", "check_impl_detail",
+ "get_attribute", "swap_item", "swap_attr", "requires_IEEE_754",
"TestHandler", "Matcher", "can_symlink", "skip_unless_symlink",
- "import_fresh_module", "failfast", "run_with_tz"
+ "skip_unless_xattr", "import_fresh_module", "requires_zlib",
+ "PIPE_MAX_SIZE", "failfast", "anticipate_failure", "run_with_tz",
+ "requires_bz2", "requires_lzma"
]
class Error(Exception):
@@ -127,6 +142,17 @@ def _save_and_block_module(name, orig_modules):
return saved
+def anticipate_failure(condition):
+ """Decorator to mark a test that is known to be broken in some cases
+
+ Any use of this decorator should have a comment identifying the
+ associated tracker issue.
+ """
+ if condition:
+ return unittest.expectedFailure
+ return lambda f: f
+
+
def import_fresh_module(name, fresh=(), blocked=(), deprecated=False):
"""Imports and returns a module, deliberately bypassing the sys.modules cache
and importing a fresh copy of the module. Once the import is complete,
@@ -170,8 +196,7 @@ def get_attribute(obj, name):
try:
attribute = getattr(obj, name)
except AttributeError:
- raise unittest.SkipTest("module %s has no attribute %s" % (
- obj.__name__, name))
+ raise unittest.SkipTest("object %r has no attribute %r" % (obj, name))
else:
return attribute
@@ -276,8 +301,7 @@ def rmtree(path):
try:
_rmtree(path)
except OSError as error:
- # Unix returns ENOENT, Windows returns ESRCH.
- if error.errno not in (errno.ENOENT, errno.ESRCH):
+ if error.errno != errno.ENOENT:
raise
def make_legacy_pyc(source):
@@ -362,9 +386,52 @@ def requires(resource, msg=None):
return
if not is_resource_enabled(resource):
if msg is None:
- msg = "Use of the `%s' resource not enabled" % resource
+ msg = "Use of the %r resource not enabled" % resource
raise ResourceDenied(msg)
+def _requires_unix_version(sysname, min_version):
+ """Decorator raising SkipTest if the OS is `sysname` and the version is less
+ than `min_version`.
+
+ For example, @_requires_unix_version('FreeBSD', (7, 2)) raises SkipTest if
+ the FreeBSD version is less than 7.2.
+ """
+ def decorator(func):
+ @functools.wraps(func)
+ def wrapper(*args, **kw):
+ if platform.system() == sysname:
+ version_txt = platform.release().split('-', 1)[0]
+ try:
+ version = tuple(map(int, version_txt.split('.')))
+ except ValueError:
+ pass
+ else:
+ if version < min_version:
+ min_version_txt = '.'.join(map(str, min_version))
+ raise unittest.SkipTest(
+ "%s version %s or higher required, not %s"
+ % (sysname, min_version_txt, version_txt))
+ return wrapper
+ return decorator
+
+def requires_freebsd_version(*min_version):
+ """Decorator raising SkipTest if the OS is FreeBSD and the FreeBSD version is
+ less than `min_version`.
+
+ For example, @requires_freebsd_version(7, 2) raises SkipTest if the FreeBSD
+ version is less than 7.2.
+ """
+ return _requires_unix_version('FreeBSD', min_version)
+
+def requires_linux_version(*min_version):
+ """Decorator raising SkipTest if the OS is Linux and the Linux version is
+ less than `min_version`.
+
+ For example, @requires_linux_version(2, 6, 32) raises SkipTest if the Linux
+ version is less than 2.6.32.
+ """
+ return _requires_unix_version('Linux', min_version)
+
def requires_mac_ver(*min_version):
"""Decorator raising SkipTest if the OS is Mac OS X and the OS X
version if less than min_version.
@@ -392,6 +459,7 @@ def requires_mac_ver(*min_version):
return wrapper
return decorator
+
HOST = 'localhost'
def find_unused_port(family=socket.AF_INET, socktype=socket.SOCK_STREAM):
@@ -487,29 +555,41 @@ def bind_port(sock, host=HOST):
port = sock.getsockname()[1]
return port
-FUZZ = 1e-6
-
-def fcmp(x, y): # fuzzy comparison function
- if isinstance(x, float) or isinstance(y, float):
+def _is_ipv6_enabled():
+ """Check whether IPv6 is enabled on this host."""
+ if socket.has_ipv6:
+ sock = None
try:
- fuzz = (abs(x) + abs(y)) * FUZZ
- if abs(x-y) <= fuzz:
- return 0
- except:
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ sock.bind(('::1', 0))
+ return True
+ except (socket.error, socket.gaierror):
pass
- elif type(x) == type(y) and isinstance(x, (tuple, list)):
- for i in range(min(len(x), len(y))):
- outcome = fcmp(x[i], y[i])
- if outcome != 0:
- return outcome
- return (len(x) > len(y)) - (len(x) < len(y))
- return (x > y) - (x < y)
+ finally:
+ if sock:
+ sock.close()
+ return False
+
+IPV6_ENABLED = _is_ipv6_enabled()
+
+
+# A constant likely larger than the underlying OS pipe buffer size.
+# Windows limit seems to be around 512B, and most Unix kernels have a 64K pipe
+# buffer size: take 1M to be sure.
+PIPE_MAX_SIZE = 1024 * 1024
+
# decorator for skipping tests on non-IEEE 754 platforms
requires_IEEE_754 = unittest.skipUnless(
float.__getformat__("double").startswith("IEEE"),
"test requires IEEE 754 doubles")
+requires_zlib = unittest.skipUnless(zlib, 'requires zlib')
+
+requires_bz2 = unittest.skipUnless(bz2, 'requires bz2')
+
+requires_lzma = unittest.skipUnless(lzma, 'requires lzma')
+
is_jython = sys.platform.startswith('java')
# Filename used for testing
@@ -610,14 +690,15 @@ def temp_cwd(name='tempcwd', quiet=False, path=None):
rmtree(name)
-@contextlib.contextmanager
-def temp_umask(umask):
- """Context manager that temporarily sets the process umask."""
- oldmask = os.umask(umask)
- try:
- yield
- finally:
- os.umask(oldmask)
+if hasattr(os, "umask"):
+ @contextlib.contextmanager
+ def temp_umask(umask):
+ """Context manager that temporarily sets the process umask."""
+ oldmask = os.umask(umask)
+ try:
+ yield
+ finally:
+ os.umask(oldmask)
def findfile(file, here=__file__, subdir=None):
@@ -635,6 +716,11 @@ def findfile(file, here=__file__, subdir=None):
if os.path.exists(fn): return fn
return file
+def create_empty_file(filename):
+ """Create an empty file. If the file already exists, truncate it."""
+ fd = os.open(filename, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
+ os.close(fd)
+
def sortdict(dict):
"Like repr(dict), but in sorted order."
items = sorted(dict.items())
@@ -699,7 +785,7 @@ def open_urlresource(url, *args, **kw):
f = check_valid_file(fn)
if f is not None:
return f
- raise TestFailed('invalid resource "%s"' % fn)
+ raise TestFailed('invalid resource %r' % fn)
class WarningsRecorder(object):
@@ -820,7 +906,7 @@ class CleanImport(object):
sys.modules.update(self.original_modules)
-class EnvironmentVarGuard(collections.MutableMapping):
+class EnvironmentVarGuard(collections.abc.MutableMapping):
"""Class to help protect the environment variable properly. Can be used as
a context manager."""
@@ -951,7 +1037,7 @@ def transient_internet(resource_name, *, timeout=30.0, errnos=()):
('WSANO_DATA', 11004),
]
- denied = ResourceDenied("Resource '%s' is not available" % resource_name)
+ denied = ResourceDenied("Resource %r is not available" % resource_name)
captured_errnos = errnos
gai_errnos = []
if not captured_errnos:
@@ -1040,6 +1126,16 @@ def gc_collect():
gc.collect()
gc.collect()
+@contextlib.contextmanager
+def disable_gc():
+ have_gc = gc.isenabled()
+ gc.disable()
+ try:
+ yield
+ finally:
+ if have_gc:
+ gc.enable()
+
def python_is_optimized():
"""Find if Python was built with optimizations."""
@@ -1048,19 +1144,21 @@ def python_is_optimized():
for opt in cflags.split():
if opt.startswith('-O'):
final_opt = opt
- return final_opt and final_opt != '-O0'
+ return final_opt != '' and final_opt != '-O0'
-_header = '2P'
+_header = 'nP'
+_align = '0n'
if hasattr(sys, "gettotalrefcount"):
_header = '2P' + _header
-_vheader = _header + 'P'
+ _align = '0P'
+_vheader = _header + 'n'
def calcobjsize(fmt):
- return struct.calcsize(_header + fmt + '0P')
+ return struct.calcsize(_header + fmt + _align)
def calcvobjsize(fmt):
- return struct.calcsize(_vheader + fmt + '0P')
+ return struct.calcsize(_vheader + fmt + _align)
_TPFLAGS_HAVE_GC = 1<<14
@@ -1134,7 +1232,7 @@ def run_with_tz(tz):
try:
return func(*args, **kwds)
finally:
- if orig_tz == None:
+ if orig_tz is None:
del os.environ['TZ']
else:
os.environ['TZ'] = orig_tz
@@ -1179,41 +1277,35 @@ def set_memlimit(limit):
raise ValueError('Memory limit %r too low to be useful' % (limit,))
max_memuse = memlimit
-def _memory_watchdog(start_evt, finish_evt, period=10.0):
- """A function which periodically watches the process' memory consumption
+class _MemoryWatchdog:
+ """An object which periodically watches the process' memory consumption
and prints it out.
"""
- # XXX: because of the GIL, and because the very long operations tested
- # in most bigmem tests are uninterruptible, the loop below gets woken up
- # much less often than expected.
- # The polling code should be rewritten in raw C, without holding the GIL,
- # and push results onto an anonymous pipe.
- try:
- page_size = os.sysconf('SC_PAGESIZE')
- except (ValueError, AttributeError):
+
+ def __init__(self):
+ self.procfile = '/proc/{pid}/statm'.format(pid=os.getpid())
+ self.started = False
+
+ def start(self):
try:
- page_size = os.sysconf('SC_PAGE_SIZE')
- except (ValueError, AttributeError):
- page_size = 4096
- procfile = '/proc/{pid}/statm'.format(pid=os.getpid())
- try:
- f = open(procfile, 'rb')
- except IOError as e:
- warnings.warn('/proc not available for stats: {}'.format(e),
- RuntimeWarning)
- sys.stderr.flush()
- return
- with f:
- start_evt.set()
- old_data = -1
- while not finish_evt.wait(period):
- f.seek(0)
- statm = f.read().decode('ascii')
- data = int(statm.split()[5])
- if data != old_data:
- old_data = data
- print(" ... process data size: {data:.1f}G"
- .format(data=data * page_size / (1024 ** 3)))
+ f = open(self.procfile, 'r')
+ except OSError as e:
+ warnings.warn('/proc not available for stats: {}'.format(e),
+ RuntimeWarning)
+ sys.stderr.flush()
+ return
+
+ watchdog_script = findfile("memory_watchdog.py")
+ self.mem_watchdog = subprocess.Popen([sys.executable, watchdog_script],
+ stdin=f, stderr=subprocess.DEVNULL)
+ f.close()
+ self.started = True
+
+ def stop(self):
+ if self.started:
+ self.mem_watchdog.terminate()
+ self.mem_watchdog.wait()
+
def bigmemtest(size, memuse, dry_run=True):
"""Decorator for bigmem tests.
@@ -1240,27 +1332,20 @@ def bigmemtest(size, memuse, dry_run=True):
"not enough memory: %.1fG minimum needed"
% (size * memuse / (1024 ** 3)))
- if real_max_memuse and verbose and threading:
+ if real_max_memuse and verbose:
print()
print(" ... expected peak memory use: {peak:.1f}G"
.format(peak=size * memuse / (1024 ** 3)))
- sys.stdout.flush()
- start_evt = threading.Event()
- finish_evt = threading.Event()
- t = threading.Thread(target=_memory_watchdog,
- args=(start_evt, finish_evt, 0.5))
- t.daemon = True
- t.start()
- start_evt.set()
+ watchdog = _MemoryWatchdog()
+ watchdog.start()
else:
- t = None
+ watchdog = None
try:
return f(self, maxsize)
finally:
- if t:
- finish_evt.set()
- t.join()
+ if watchdog:
+ watchdog.stop()
wrapper.size = size
wrapper.memuse = memuse
@@ -1342,6 +1427,33 @@ def check_impl_detail(**guards):
return guards.get(platform.python_implementation().lower(), default)
+def no_tracing(func):
+ """Decorator to temporarily turn off tracing for the duration of a test."""
+ if not hasattr(sys, 'gettrace'):
+ return func
+ else:
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ original_trace = sys.gettrace()
+ try:
+ sys.settrace(None)
+ return func(*args, **kwargs)
+ finally:
+ sys.settrace(original_trace)
+ return wrapper
+
+
+def refcount_test(test):
+ """Decorator for tests which involve reference counting.
+
+ To start, the decorator does not run the test if is not run by CPython.
+ After that, any trace function is unset during the test to prevent
+ unexpected refcounts caused by the trace function.
+
+ """
+ return no_tracing(cpython_only(test))
+
+
def _filter_suite(suite, pred):
"""Recursively filter test cases in a suite based on a predicate."""
newtests = []
@@ -1354,7 +1466,6 @@ def _filter_suite(suite, pred):
newtests.append(test)
suite._tests = newtests
-
def _run_suite(suite):
"""Run tests from a unittest.TestSuite-derived class."""
if verbose:
@@ -1403,7 +1514,7 @@ def run_unittest(*classes):
#=======================================================================
# doctest driver.
-def run_doctest(module, verbosity=None):
+def run_doctest(module, verbosity=None, optionflags=0):
"""Run doctest on the given module. Return (#failures, #tests).
If optional argument verbosity is not specified (or is None), pass
@@ -1418,7 +1529,7 @@ def run_doctest(module, verbosity=None):
else:
verbosity = None
- f, t = doctest.testmod(module, verbose=verbosity)
+ f, t = doctest.testmod(module, verbose=verbosity, optionflags=optionflags)
if f:
raise TestFailed("%d of %d doctests failed" % (f, t))
if verbose:
@@ -1576,28 +1687,13 @@ def strip_python_stderr(stderr):
This will typically be run on the result of the communicate() method
of a subprocess.Popen object.
"""
- stderr = re.sub(br"\[\d+ refs\]\r?\n?$", b"", stderr).strip()
+ stderr = re.sub(br"\[\d+ refs\]\r?\n?", b"", stderr).strip()
return stderr
def args_from_interpreter_flags():
"""Return a list of command-line arguments reproducing the current
- settings in sys.flags."""
- flag_opt_map = {
- 'bytes_warning': 'b',
- 'dont_write_bytecode': 'B',
- 'hash_randomization': 'R',
- 'ignore_environment': 'E',
- 'no_user_site': 's',
- 'no_site': 'S',
- 'optimize': 'O',
- 'verbose': 'v',
- }
- args = []
- for flag, opt in flag_opt_map.items():
- v = getattr(sys.flags, flag)
- if v > 0:
- args.append('-' + opt * v)
- return args
+ settings in sys.flags and sys.warnoptions."""
+ return subprocess._args_from_interpreter_flags()
#============================================================
# Support for assertions about logging.
@@ -1687,6 +1783,40 @@ def skip_unless_symlink(test):
msg = "Requires functional symlink implementation"
return test if ok else unittest.skip(msg)(test)
+_can_xattr = None
+def can_xattr():
+ global _can_xattr
+ if _can_xattr is not None:
+ return _can_xattr
+ if not hasattr(os, "setxattr"):
+ can = False
+ else:
+ tmp_fp, tmp_name = tempfile.mkstemp()
+ try:
+ with open(TESTFN, "wb") as fp:
+ try:
+ # TESTFN & tempfile may use different file systems with
+ # different capabilities
+ os.setxattr(tmp_fp, b"user.test", b"")
+ os.setxattr(fp.fileno(), b"user.test", b"")
+ # Kernels < 2.6.39 don't respect setxattr flags.
+ kernel_version = platform.release()
+ m = re.match("2.6.(\d{1,2})", kernel_version)
+ can = m is None or int(m.group(1)) >= 39
+ except OSError:
+ can = False
+ finally:
+ unlink(TESTFN)
+ unlink(tmp_name)
+ _can_xattr = can
+ return can
+
+def skip_unless_xattr(test):
+ """Skip decorator for tests that require functional extended attributes"""
+ ok = can_xattr()
+ msg = "no non-broken extended attribute support"
+ return test if ok else unittest.skip(msg)(test)
+
def patch(test_instance, object_to_patch, attr_name, new_value):
"""Override 'object_to_patch'.'attr_name' with 'new_value'.
diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py
index 3fadb57..4231f37 100644
--- a/Lib/test/test__locale.py
+++ b/Lib/test/test__locale.py
@@ -1,23 +1,25 @@
-from test.support import run_unittest
from _locale import (setlocale, LC_ALL, LC_CTYPE, LC_NUMERIC, localeconv, Error)
try:
from _locale import (RADIXCHAR, THOUSEP, nl_langinfo)
except ImportError:
nl_langinfo = None
-import unittest
+import codecs
+import locale
import sys
+import unittest
from platform import uname
+from test.support import run_unittest
-if uname()[0] == "Darwin":
- maj, min, mic = [int(part) for part in uname()[2].split(".")]
+if uname().system == "Darwin":
+ maj, min, mic = [int(part) for part in uname().release.split(".")]
if (maj, min, mic) < (8, 0, 0):
raise unittest.SkipTest("locale support broken for OS X < 10.4")
candidate_locales = ['es_UY', 'fr_FR', 'fi_FI', 'es_CO', 'pt_PT', 'it_IT',
'et_EE', 'es_PY', 'no_NO', 'nl_NL', 'lv_LV', 'el_GR', 'be_BY', 'fr_BE',
'ro_RO', 'ru_UA', 'ru_RU', 'es_VE', 'ca_ES', 'se_NO', 'es_EC', 'id_ID',
- 'ka_GE', 'es_CL', 'hu_HU', 'wa_BE', 'lt_LT', 'sl_SI', 'hr_HR', 'es_AR',
+ 'ka_GE', 'es_CL', 'wa_BE', 'hu_HU', 'lt_LT', 'sl_SI', 'hr_HR', 'es_AR',
'es_ES', 'oc_FR', 'gl_ES', 'bg_BG', 'is_IS', 'mk_MK', 'de_AT', 'pt_BR',
'da_DK', 'nn_NO', 'cs_CZ', 'de_LU', 'es_BO', 'sq_AL', 'sk_SK', 'fr_CH',
'de_DE', 'sr_YU', 'br_FR', 'nl_BE', 'sv_FI', 'pl_PL', 'fr_CA', 'fo_FO',
@@ -25,6 +27,31 @@ candidate_locales = ['es_UY', 'fr_FR', 'fi_FI', 'es_CO', 'pt_PT', 'it_IT',
'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'en_US',
'es_ES.ISO8859-1', 'fr_FR.ISO8859-15', 'ru_RU.KOI8-R', 'ko_KR.eucKR']
+# Issue #13441: Skip some locales (e.g. cs_CZ and hu_HU) on Solaris to
+# workaround a mbstowcs() bug. For example, on Solaris, the hu_HU locale uses
+# the locale encoding ISO-8859-2, the thousauds separator is b'\xA0' and it is
+# decoded as U+30000020 (an invalid character) by mbstowcs().
+if sys.platform == 'sunos5':
+ old_locale = locale.setlocale(locale.LC_ALL)
+ try:
+ locales = []
+ for loc in candidate_locales:
+ try:
+ locale.setlocale(locale.LC_ALL, loc)
+ except Error:
+ continue
+ encoding = locale.getpreferredencoding(False)
+ try:
+ localeconv()
+ except Exception as err:
+ print("WARNING: Skip locale %s (encoding %s): [%s] %s"
+ % (loc, encoding, type(err), err))
+ else:
+ locales.append(loc)
+ candidate_locales = locales
+ finally:
+ locale.setlocale(locale.LC_ALL, old_locale)
+
# Workaround for MSVC6(debug) crash bug
if "MSC v.1200" in sys.version:
def accept(loc):
@@ -86,9 +113,10 @@ class _LocaleTests(unittest.TestCase):
setlocale(LC_CTYPE, loc)
except Error:
continue
+ formatting = localeconv()
for lc in ("decimal_point",
"thousands_sep"):
- self.numeric_tester('localeconv', localeconv()[lc], lc, loc)
+ self.numeric_tester('localeconv', formatting[lc], lc, loc)
@unittest.skipUnless(nl_langinfo, "nl_langinfo is not available")
def test_lc_numeric_basic(self):
diff --git a/Lib/test/test__osx_support.py b/Lib/test/test__osx_support.py
new file mode 100644
index 0000000..fb159ec
--- /dev/null
+++ b/Lib/test/test__osx_support.py
@@ -0,0 +1,279 @@
+"""
+Test suite for _osx_support: shared OS X support functions.
+"""
+
+import os
+import platform
+import shutil
+import stat
+import sys
+import unittest
+
+import test.support
+
+import _osx_support
+
+@unittest.skipUnless(sys.platform.startswith("darwin"), "requires OS X")
+class Test_OSXSupport(unittest.TestCase):
+
+ def setUp(self):
+ self.maxDiff = None
+ self.prog_name = 'bogus_program_xxxx'
+ self.temp_path_dir = os.path.abspath(os.getcwd())
+ self.env = test.support.EnvironmentVarGuard()
+ self.addCleanup(self.env.__exit__)
+ for cv in ('CFLAGS', 'LDFLAGS', 'CPPFLAGS',
+ 'BASECFLAGS', 'BLDSHARED', 'LDSHARED', 'CC',
+ 'CXX', 'PY_CFLAGS', 'PY_LDFLAGS', 'PY_CPPFLAGS',
+ 'PY_CORE_CFLAGS'):
+ if cv in self.env:
+ self.env.unset(cv)
+
+ def add_expected_saved_initial_values(self, config_vars, expected_vars):
+ # Ensure that the initial values for all modified config vars
+ # are also saved with modified keys.
+ expected_vars.update(('_OSX_SUPPORT_INITIAL_'+ k,
+ config_vars[k]) for k in config_vars
+ if config_vars[k] != expected_vars[k])
+
+ def test__find_executable(self):
+ if self.env['PATH']:
+ self.env['PATH'] = self.env['PATH'] + ':'
+ self.env['PATH'] = self.env['PATH'] + os.path.abspath(self.temp_path_dir)
+ test.support.unlink(self.prog_name)
+ self.assertIsNone(_osx_support._find_executable(self.prog_name))
+ self.addCleanup(test.support.unlink, self.prog_name)
+ with open(self.prog_name, 'w') as f:
+ f.write("#!/bin/sh\n/bin/echo OK\n")
+ os.chmod(self.prog_name, stat.S_IRWXU)
+ self.assertEqual(self.prog_name,
+ _osx_support._find_executable(self.prog_name))
+
+ def test__read_output(self):
+ if self.env['PATH']:
+ self.env['PATH'] = self.env['PATH'] + ':'
+ self.env['PATH'] = self.env['PATH'] + os.path.abspath(self.temp_path_dir)
+ test.support.unlink(self.prog_name)
+ self.addCleanup(test.support.unlink, self.prog_name)
+ with open(self.prog_name, 'w') as f:
+ f.write("#!/bin/sh\n/bin/echo ExpectedOutput\n")
+ os.chmod(self.prog_name, stat.S_IRWXU)
+ self.assertEqual('ExpectedOutput',
+ _osx_support._read_output(self.prog_name))
+
+ def test__find_build_tool(self):
+ out = _osx_support._find_build_tool('cc')
+ self.assertTrue(os.path.isfile(out),
+ 'cc not found - check xcode-select')
+
+ def test__get_system_version(self):
+ self.assertTrue(platform.mac_ver()[0].startswith(
+ _osx_support._get_system_version()))
+
+ def test__remove_original_values(self):
+ config_vars = {
+ 'CC': 'gcc-test -pthreads',
+ }
+ expected_vars = {
+ 'CC': 'clang -pthreads',
+ }
+ cv = 'CC'
+ newvalue = 'clang -pthreads'
+ _osx_support._save_modified_value(config_vars, cv, newvalue)
+ self.assertNotEqual(expected_vars, config_vars)
+ _osx_support._remove_original_values(config_vars)
+ self.assertEqual(expected_vars, config_vars)
+
+ def test__save_modified_value(self):
+ config_vars = {
+ 'CC': 'gcc-test -pthreads',
+ }
+ expected_vars = {
+ 'CC': 'clang -pthreads',
+ }
+ self.add_expected_saved_initial_values(config_vars, expected_vars)
+ cv = 'CC'
+ newvalue = 'clang -pthreads'
+ _osx_support._save_modified_value(config_vars, cv, newvalue)
+ self.assertEqual(expected_vars, config_vars)
+
+ def test__save_modified_value_unchanged(self):
+ config_vars = {
+ 'CC': 'gcc-test -pthreads',
+ }
+ expected_vars = config_vars.copy()
+ cv = 'CC'
+ newvalue = 'gcc-test -pthreads'
+ _osx_support._save_modified_value(config_vars, cv, newvalue)
+ self.assertEqual(expected_vars, config_vars)
+
+ def test__supports_universal_builds(self):
+ import platform
+ self.assertEqual(platform.mac_ver()[0].split('.') >= ['10', '4'],
+ _osx_support._supports_universal_builds())
+
+ def test__find_appropriate_compiler(self):
+ compilers = (
+ ('gcc-test', 'i686-apple-darwin11-llvm-gcc-4.2'),
+ ('clang', 'clang version 3.1'),
+ )
+ config_vars = {
+ 'CC': 'gcc-test -pthreads',
+ 'CXX': 'cc++-test',
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch ppc -arch i386 ',
+ 'LDFLAGS': '-arch ppc -arch i386 -g',
+ 'CPPFLAGS': '-I. -isysroot /Developer/SDKs/MacOSX10.4u.sdk',
+ 'BLDSHARED': 'gcc-test -bundle -arch ppc -arch i386 -g',
+ 'LDSHARED': 'gcc-test -bundle -arch ppc -arch i386 '
+ '-isysroot /Developer/SDKs/MacOSX10.4u.sdk -g',
+ }
+ expected_vars = {
+ 'CC': 'clang -pthreads',
+ 'CXX': 'clang++',
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch ppc -arch i386 ',
+ 'LDFLAGS': '-arch ppc -arch i386 -g',
+ 'CPPFLAGS': '-I. -isysroot /Developer/SDKs/MacOSX10.4u.sdk',
+ 'BLDSHARED': 'clang -bundle -arch ppc -arch i386 -g',
+ 'LDSHARED': 'clang -bundle -arch ppc -arch i386 '
+ '-isysroot /Developer/SDKs/MacOSX10.4u.sdk -g',
+ }
+ self.add_expected_saved_initial_values(config_vars, expected_vars)
+
+ suffix = (':' + self.env['PATH']) if self.env['PATH'] else ''
+ self.env['PATH'] = os.path.abspath(self.temp_path_dir) + suffix
+ for c_name, c_output in compilers:
+ test.support.unlink(c_name)
+ self.addCleanup(test.support.unlink, c_name)
+ with open(c_name, 'w') as f:
+ f.write("#!/bin/sh\n/bin/echo " + c_output)
+ os.chmod(c_name, stat.S_IRWXU)
+ self.assertEqual(expected_vars,
+ _osx_support._find_appropriate_compiler(
+ config_vars))
+
+ def test__remove_universal_flags(self):
+ config_vars = {
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch ppc -arch i386 ',
+ 'LDFLAGS': '-arch ppc -arch i386 -g',
+ 'CPPFLAGS': '-I. -isysroot /Developer/SDKs/MacOSX10.4u.sdk',
+ 'BLDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 -g',
+ 'LDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 '
+ '-isysroot /Developer/SDKs/MacOSX10.4u.sdk -g',
+ }
+ expected_vars = {
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 ',
+ 'LDFLAGS': ' -g',
+ 'CPPFLAGS': '-I. ',
+ 'BLDSHARED': 'gcc-4.0 -bundle -g',
+ 'LDSHARED': 'gcc-4.0 -bundle -g',
+ }
+ self.add_expected_saved_initial_values(config_vars, expected_vars)
+
+ self.assertEqual(expected_vars,
+ _osx_support._remove_universal_flags(
+ config_vars))
+
+ def test__remove_unsupported_archs(self):
+ config_vars = {
+ 'CC': 'clang',
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch ppc -arch i386 ',
+ 'LDFLAGS': '-arch ppc -arch i386 -g',
+ 'CPPFLAGS': '-I. -isysroot /Developer/SDKs/MacOSX10.4u.sdk',
+ 'BLDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 -g',
+ 'LDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 '
+ '-isysroot /Developer/SDKs/MacOSX10.4u.sdk -g',
+ }
+ expected_vars = {
+ 'CC': 'clang',
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch i386 ',
+ 'LDFLAGS': ' -arch i386 -g',
+ 'CPPFLAGS': '-I. -isysroot /Developer/SDKs/MacOSX10.4u.sdk',
+ 'BLDSHARED': 'gcc-4.0 -bundle -arch i386 -g',
+ 'LDSHARED': 'gcc-4.0 -bundle -arch i386 '
+ '-isysroot /Developer/SDKs/MacOSX10.4u.sdk -g',
+ }
+ self.add_expected_saved_initial_values(config_vars, expected_vars)
+
+ suffix = (':' + self.env['PATH']) if self.env['PATH'] else ''
+ self.env['PATH'] = os.path.abspath(self.temp_path_dir) + suffix
+ c_name = 'clang'
+ test.support.unlink(c_name)
+ self.addCleanup(test.support.unlink, c_name)
+ # exit status 255 means no PPC support in this compiler chain
+ with open(c_name, 'w') as f:
+ f.write("#!/bin/sh\nexit 255")
+ os.chmod(c_name, stat.S_IRWXU)
+ self.assertEqual(expected_vars,
+ _osx_support._remove_unsupported_archs(
+ config_vars))
+
+ def test__override_all_archs(self):
+ self.env['ARCHFLAGS'] = '-arch x86_64'
+ config_vars = {
+ 'CC': 'clang',
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch ppc -arch i386 ',
+ 'LDFLAGS': '-arch ppc -arch i386 -g',
+ 'CPPFLAGS': '-I. -isysroot /Developer/SDKs/MacOSX10.4u.sdk',
+ 'BLDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 -g',
+ 'LDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 '
+ '-isysroot /Developer/SDKs/MacOSX10.4u.sdk -g',
+ }
+ expected_vars = {
+ 'CC': 'clang',
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch x86_64',
+ 'LDFLAGS': ' -g -arch x86_64',
+ 'CPPFLAGS': '-I. -isysroot /Developer/SDKs/MacOSX10.4u.sdk',
+ 'BLDSHARED': 'gcc-4.0 -bundle -g -arch x86_64',
+ 'LDSHARED': 'gcc-4.0 -bundle -isysroot '
+ '/Developer/SDKs/MacOSX10.4u.sdk -g -arch x86_64',
+ }
+ self.add_expected_saved_initial_values(config_vars, expected_vars)
+
+ self.assertEqual(expected_vars,
+ _osx_support._override_all_archs(
+ config_vars))
+
+ def test__check_for_unavailable_sdk(self):
+ config_vars = {
+ 'CC': 'clang',
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch ppc -arch i386 '
+ '-isysroot /Developer/SDKs/MacOSX10.1.sdk',
+ 'LDFLAGS': '-arch ppc -arch i386 -g',
+ 'CPPFLAGS': '-I. -isysroot /Developer/SDKs/MacOSX10.1.sdk',
+ 'BLDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 -g',
+ 'LDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 '
+ '-isysroot /Developer/SDKs/MacOSX10.1.sdk -g',
+ }
+ expected_vars = {
+ 'CC': 'clang',
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch ppc -arch i386 '
+ ' ',
+ 'LDFLAGS': '-arch ppc -arch i386 -g',
+ 'CPPFLAGS': '-I. ',
+ 'BLDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 -g',
+ 'LDSHARED': 'gcc-4.0 -bundle -arch ppc -arch i386 '
+ ' -g',
+ }
+ self.add_expected_saved_initial_values(config_vars, expected_vars)
+
+ self.assertEqual(expected_vars,
+ _osx_support._check_for_unavailable_sdk(
+ config_vars))
+
+ def test_get_platform_osx(self):
+ # Note, get_platform_osx is currently tested more extensively
+ # indirectly by test_sysconfig and test_distutils
+ config_vars = {
+ 'CFLAGS': '-fno-strict-aliasing -g -O3 -arch ppc -arch i386 '
+ '-isysroot /Developer/SDKs/MacOSX10.1.sdk',
+ 'MACOSX_DEPLOYMENT_TARGET': '10.6',
+ }
+ result = _osx_support.get_platform_osx(config_vars, ' ', ' ', ' ')
+ self.assertEqual(('macosx', '10.6', 'fat'), result)
+
+def test_main():
+ if sys.platform == 'darwin':
+ test.support.run_unittest(Test_OSXSupport)
+
+if __name__ == "__main__":
+ test_main()
diff --git a/Lib/test/test_abc.py b/Lib/test/test_abc.py
index d86f97c..653c957 100644
--- a/Lib/test/test_abc.py
+++ b/Lib/test/test_abc.py
@@ -10,14 +10,7 @@ import abc
from inspect import isabstract
-class TestABC(unittest.TestCase):
-
- def test_abstractmethod_basics(self):
- @abc.abstractmethod
- def foo(self): pass
- self.assertTrue(foo.__isabstractmethod__)
- def bar(self): pass
- self.assertFalse(hasattr(bar, "__isabstractmethod__"))
+class TestLegacyAPI(unittest.TestCase):
def test_abstractproperty_basics(self):
@abc.abstractproperty
@@ -29,10 +22,12 @@ class TestABC(unittest.TestCase):
class C(metaclass=abc.ABCMeta):
@abc.abstractproperty
def foo(self): return 3
+ self.assertRaises(TypeError, C)
class D(C):
@property
def foo(self): return super().foo
self.assertEqual(D().foo, 3)
+ self.assertFalse(getattr(D.foo, "__isabstractmethod__", False))
def test_abstractclassmethod_basics(self):
@abc.abstractclassmethod
@@ -40,7 +35,7 @@ class TestABC(unittest.TestCase):
self.assertTrue(foo.__isabstractmethod__)
@classmethod
def bar(cls): pass
- self.assertFalse(hasattr(bar, "__isabstractmethod__"))
+ self.assertFalse(getattr(bar, "__isabstractmethod__", False))
class C(metaclass=abc.ABCMeta):
@abc.abstractclassmethod
@@ -58,7 +53,7 @@ class TestABC(unittest.TestCase):
self.assertTrue(foo.__isabstractmethod__)
@staticmethod
def bar(): pass
- self.assertFalse(hasattr(bar, "__isabstractmethod__"))
+ self.assertFalse(getattr(bar, "__isabstractmethod__", False))
class C(metaclass=abc.ABCMeta):
@abc.abstractstaticmethod
@@ -98,6 +93,163 @@ class TestABC(unittest.TestCase):
self.assertRaises(TypeError, F) # because bar is abstract now
self.assertTrue(isabstract(F))
+
+class TestABC(unittest.TestCase):
+
+ def test_abstractmethod_basics(self):
+ @abc.abstractmethod
+ def foo(self): pass
+ self.assertTrue(foo.__isabstractmethod__)
+ def bar(self): pass
+ self.assertFalse(hasattr(bar, "__isabstractmethod__"))
+
+ def test_abstractproperty_basics(self):
+ @property
+ @abc.abstractmethod
+ def foo(self): pass
+ self.assertTrue(foo.__isabstractmethod__)
+ def bar(self): pass
+ self.assertFalse(getattr(bar, "__isabstractmethod__", False))
+
+ class C(metaclass=abc.ABCMeta):
+ @property
+ @abc.abstractmethod
+ def foo(self): return 3
+ self.assertRaises(TypeError, C)
+ class D(C):
+ @C.foo.getter
+ def foo(self): return super().foo
+ self.assertEqual(D().foo, 3)
+
+ def test_abstractclassmethod_basics(self):
+ @classmethod
+ @abc.abstractmethod
+ def foo(cls): pass
+ self.assertTrue(foo.__isabstractmethod__)
+ @classmethod
+ def bar(cls): pass
+ self.assertFalse(getattr(bar, "__isabstractmethod__", False))
+
+ class C(metaclass=abc.ABCMeta):
+ @classmethod
+ @abc.abstractmethod
+ def foo(cls): return cls.__name__
+ self.assertRaises(TypeError, C)
+ class D(C):
+ @classmethod
+ def foo(cls): return super().foo()
+ self.assertEqual(D.foo(), 'D')
+ self.assertEqual(D().foo(), 'D')
+
+ def test_abstractstaticmethod_basics(self):
+ @staticmethod
+ @abc.abstractmethod
+ def foo(): pass
+ self.assertTrue(foo.__isabstractmethod__)
+ @staticmethod
+ def bar(): pass
+ self.assertFalse(getattr(bar, "__isabstractmethod__", False))
+
+ class C(metaclass=abc.ABCMeta):
+ @staticmethod
+ @abc.abstractmethod
+ def foo(): return 3
+ self.assertRaises(TypeError, C)
+ class D(C):
+ @staticmethod
+ def foo(): return 4
+ self.assertEqual(D.foo(), 4)
+ self.assertEqual(D().foo(), 4)
+
+ def test_abstractmethod_integration(self):
+ for abstractthing in [abc.abstractmethod, abc.abstractproperty,
+ abc.abstractclassmethod,
+ abc.abstractstaticmethod]:
+ class C(metaclass=abc.ABCMeta):
+ @abstractthing
+ def foo(self): pass # abstract
+ def bar(self): pass # concrete
+ self.assertEqual(C.__abstractmethods__, {"foo"})
+ self.assertRaises(TypeError, C) # because foo is abstract
+ self.assertTrue(isabstract(C))
+ class D(C):
+ def bar(self): pass # concrete override of concrete
+ self.assertEqual(D.__abstractmethods__, {"foo"})
+ self.assertRaises(TypeError, D) # because foo is still abstract
+ self.assertTrue(isabstract(D))
+ class E(D):
+ def foo(self): pass
+ self.assertEqual(E.__abstractmethods__, set())
+ E() # now foo is concrete, too
+ self.assertFalse(isabstract(E))
+ class F(E):
+ @abstractthing
+ def bar(self): pass # abstract override of concrete
+ self.assertEqual(F.__abstractmethods__, {"bar"})
+ self.assertRaises(TypeError, F) # because bar is abstract now
+ self.assertTrue(isabstract(F))
+
+ def test_descriptors_with_abstractmethod(self):
+ class C(metaclass=abc.ABCMeta):
+ @property
+ @abc.abstractmethod
+ def foo(self): return 3
+ @foo.setter
+ @abc.abstractmethod
+ def foo(self, val): pass
+ self.assertRaises(TypeError, C)
+ class D(C):
+ @C.foo.getter
+ def foo(self): return super().foo
+ self.assertRaises(TypeError, D)
+ class E(D):
+ @D.foo.setter
+ def foo(self, val): pass
+ self.assertEqual(E().foo, 3)
+ # check that the property's __isabstractmethod__ descriptor does the
+ # right thing when presented with a value that fails truth testing:
+ class NotBool(object):
+ def __nonzero__(self):
+ raise ValueError()
+ __len__ = __nonzero__
+ with self.assertRaises(ValueError):
+ class F(C):
+ def bar(self):
+ pass
+ bar.__isabstractmethod__ = NotBool()
+ foo = property(bar)
+
+
+ def test_customdescriptors_with_abstractmethod(self):
+ class Descriptor:
+ def __init__(self, fget, fset=None):
+ self._fget = fget
+ self._fset = fset
+ def getter(self, callable):
+ return Descriptor(callable, self._fget)
+ def setter(self, callable):
+ return Descriptor(self._fget, callable)
+ @property
+ def __isabstractmethod__(self):
+ return (getattr(self._fget, '__isabstractmethod__', False)
+ or getattr(self._fset, '__isabstractmethod__', False))
+ class C(metaclass=abc.ABCMeta):
+ @Descriptor
+ @abc.abstractmethod
+ def foo(self): return 3
+ @foo.setter
+ @abc.abstractmethod
+ def foo(self, val): pass
+ self.assertRaises(TypeError, C)
+ class D(C):
+ @C.foo.getter
+ def foo(self): return super().foo
+ self.assertRaises(TypeError, D)
+ class E(D):
+ @D.foo.setter
+ def foo(self, val): pass
+ self.assertFalse(E.foo.__isabstractmethod__)
+
def test_metaclass_abc(self):
# Metaclasses can be ABCs, too.
class A(metaclass=abc.ABCMeta):
@@ -121,11 +273,32 @@ class TestABC(unittest.TestCase):
self.assertFalse(issubclass(B, (A,)))
self.assertNotIsInstance(b, A)
self.assertNotIsInstance(b, (A,))
- A.register(B)
+ B1 = A.register(B)
+ self.assertTrue(issubclass(B, A))
+ self.assertTrue(issubclass(B, (A,)))
+ self.assertIsInstance(b, A)
+ self.assertIsInstance(b, (A,))
+ self.assertIs(B1, B)
+ class C(B):
+ pass
+ c = C()
+ self.assertTrue(issubclass(C, A))
+ self.assertTrue(issubclass(C, (A,)))
+ self.assertIsInstance(c, A)
+ self.assertIsInstance(c, (A,))
+
+ def test_register_as_class_deco(self):
+ class A(metaclass=abc.ABCMeta):
+ pass
+ @A.register
+ class B(object):
+ pass
+ b = B()
self.assertTrue(issubclass(B, A))
self.assertTrue(issubclass(B, (A,)))
self.assertIsInstance(b, A)
self.assertIsInstance(b, (A,))
+ @A.register
class C(B):
pass
c = C()
@@ -133,6 +306,7 @@ class TestABC(unittest.TestCase):
self.assertTrue(issubclass(C, (A,)))
self.assertIsInstance(c, A)
self.assertIsInstance(c, (A,))
+ self.assertIs(C, A.register(C))
def test_isinstance_invalidation(self):
class A(metaclass=abc.ABCMeta):
diff --git a/Lib/test/test_abstract_numbers.py b/Lib/test/test_abstract_numbers.py
index 2a396cd..253e6f0 100644
--- a/Lib/test/test_abstract_numbers.py
+++ b/Lib/test/test_abstract_numbers.py
@@ -14,6 +14,7 @@ class TestNumbers(unittest.TestCase):
self.assertEqual(7, int(7).real)
self.assertEqual(0, int(7).imag)
self.assertEqual(7, int(7).conjugate())
+ self.assertEqual(-7, int(-7).conjugate())
self.assertEqual(7, int(7).numerator)
self.assertEqual(1, int(7).denominator)
@@ -24,6 +25,7 @@ class TestNumbers(unittest.TestCase):
self.assertEqual(7.3, float(7.3).real)
self.assertEqual(0, float(7.3).imag)
self.assertEqual(7.3, float(7.3).conjugate())
+ self.assertEqual(-7.3, float(-7.3).conjugate())
def test_complex(self):
self.assertFalse(issubclass(complex, Real))
diff --git a/Lib/test/test_aifc.py b/Lib/test/test_aifc.py
index ee4ad6b..ad6f610 100644
--- a/Lib/test/test_aifc.py
+++ b/Lib/test/test_aifc.py
@@ -1,4 +1,4 @@
-from test.support import findfile, run_unittest, TESTFN, captured_stdout, unlink
+from test.support import findfile, run_unittest, TESTFN, unlink
import unittest
import os
import io
@@ -207,11 +207,8 @@ class AIFCLowLevelTest(unittest.TestCase):
b += b'COMM' + struct.pack('>LhlhhLL', 18, 0, 0, 0, 0, 0, 0)
b += b'SSND' + struct.pack('>L', 8) + b'\x00' * 8
b += b'MARK' + struct.pack('>LhB', 3, 1, 1)
- with captured_stdout() as s:
+ with self.assertWarns(UserWarning):
f = aifc.open(io.BytesIO(b))
- self.assertEqual(
- s.getvalue(),
- 'Warning: MARK chunk contains only 0 markers instead of 1\n')
self.assertEqual(f.getmarkers(), None)
def test_read_comm_kludge_compname_even(self):
@@ -219,9 +216,8 @@ class AIFCLowLevelTest(unittest.TestCase):
b += b'COMM' + struct.pack('>LhlhhLL', 18, 0, 0, 0, 0, 0, 0)
b += b'NONE' + struct.pack('B', 4) + b'even' + b'\x00'
b += b'SSND' + struct.pack('>L', 8) + b'\x00' * 8
- with captured_stdout() as s:
+ with self.assertWarns(UserWarning):
f = aifc.open(io.BytesIO(b))
- self.assertEqual(s.getvalue(), 'Warning: bad COMM chunk size\n')
self.assertEqual(f.getcompname(), b'even')
def test_read_comm_kludge_compname_odd(self):
@@ -229,9 +225,8 @@ class AIFCLowLevelTest(unittest.TestCase):
b += b'COMM' + struct.pack('>LhlhhLL', 18, 0, 0, 0, 0, 0, 0)
b += b'NONE' + struct.pack('B', 3) + b'odd'
b += b'SSND' + struct.pack('>L', 8) + b'\x00' * 8
- with captured_stdout() as s:
+ with self.assertWarns(UserWarning):
f = aifc.open(io.BytesIO(b))
- self.assertEqual(s.getvalue(), 'Warning: bad COMM chunk size\n')
self.assertEqual(f.getcompname(), b'odd')
def test_write_params_raises(self):
diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py
index 22c26cc..c06c940 100644
--- a/Lib/test/test_argparse.py
+++ b/Lib/test/test_argparse.py
@@ -1323,20 +1323,21 @@ class TestParserDefaultSuppress(ParserTestCase):
class TestParserDefault42(ParserTestCase):
"""Test actions with a parser-level default of 42"""
- parser_signature = Sig(argument_default=42, version='1.0')
+ parser_signature = Sig(argument_default=42)
argument_signatures = [
+ Sig('--version', action='version', version='1.0'),
Sig('foo', nargs='?'),
Sig('bar', nargs='*'),
Sig('--baz', action='store_true'),
]
failures = ['-x']
successes = [
- ('', NS(foo=42, bar=42, baz=42)),
- ('a', NS(foo='a', bar=42, baz=42)),
- ('a b', NS(foo='a', bar=['b'], baz=42)),
- ('--baz', NS(foo=42, bar=42, baz=True)),
- ('a --baz', NS(foo='a', bar=42, baz=True)),
- ('--baz a b', NS(foo='a', bar=['b'], baz=True)),
+ ('', NS(foo=42, bar=42, baz=42, version=42)),
+ ('a', NS(foo='a', bar=42, baz=42, version=42)),
+ ('a b', NS(foo='a', bar=['b'], baz=42, version=42)),
+ ('--baz', NS(foo=42, bar=42, baz=True, version=42)),
+ ('a --baz', NS(foo='a', bar=42, baz=True, version=42)),
+ ('--baz a b', NS(foo='a', bar=['b'], baz=True, version=42)),
]
@@ -2927,10 +2928,9 @@ class TestHelpFormattingMetaclass(type):
parser_text = sfile.getvalue()
self._test(tester, parser_text)
- # add tests for {format,print}_{usage,help,version}
+ # add tests for {format,print}_{usage,help}
for func_suffix, std_name in [('usage', 'stdout'),
- ('help', 'stdout'),
- ('version', 'stderr')]:
+ ('help', 'stdout')]:
AddTests(cls, func_suffix, std_name)
bases = TestCase,
@@ -2941,8 +2941,9 @@ class TestHelpBiggerOptionals(HelpTestCase):
"""Make sure that argument help aligns when options are longer"""
parser_signature = Sig(prog='PROG', description='DESCRIPTION',
- epilog='EPILOG', version='0.1')
+ epilog='EPILOG')
argument_signatures = [
+ Sig('-v', '--version', action='version', version='0.1'),
Sig('-x', action='store_true', help='X HELP'),
Sig('--y', help='Y HELP'),
Sig('foo', help='FOO HELP'),
@@ -2977,8 +2978,9 @@ class TestHelpBiggerOptionalGroups(HelpTestCase):
"""Make sure that argument help aligns when options are longer"""
parser_signature = Sig(prog='PROG', description='DESCRIPTION',
- epilog='EPILOG', version='0.1')
+ epilog='EPILOG')
argument_signatures = [
+ Sig('-v', '--version', action='version', version='0.1'),
Sig('-x', action='store_true', help='X HELP'),
Sig('--y', help='Y HELP'),
Sig('foo', help='FOO HELP'),
@@ -3145,9 +3147,9 @@ HHAAHHH
class TestHelpWrappingLongNames(HelpTestCase):
"""Make sure that text after long names starts on the next line"""
- parser_signature = Sig(usage='USAGE', description= 'D D' * 30,
- version='V V'*30)
+ parser_signature = Sig(usage='USAGE', description= 'D D' * 30)
argument_signatures = [
+ Sig('-v', '--version', action='version', version='V V' * 30),
Sig('-x', metavar='X' * 25, help='XH XH' * 20),
Sig('y', metavar='y' * 25, help='YH YH' * 20),
]
@@ -3750,8 +3752,9 @@ class TestHelpNoHelpOptional(HelpTestCase):
class TestHelpVersionOptional(HelpTestCase):
"""Test that the --version argument can be suppressed help messages"""
- parser_signature = Sig(prog='PROG', version='1.0')
+ parser_signature = Sig(prog='PROG')
argument_signatures = [
+ Sig('-v', '--version', action='version', version='1.0'),
Sig('--foo', help='foo help'),
Sig('spam', help='spam help'),
]
@@ -3984,8 +3987,8 @@ class TestHelpVersionAction(HelpTestCase):
class TestHelpSubparsersOrdering(HelpTestCase):
"""Test ordering of subcommands in help matches the code"""
parser_signature = Sig(prog='PROG',
- description='display some subcommands',
- version='0.1')
+ description='display some subcommands')
+ argument_signatures = [Sig('-v', '--version', action='version', version='0.1')]
subparsers_signatures = [Sig(name=name)
for name in ('a', 'b', 'c', 'd', 'e')]
@@ -4013,8 +4016,8 @@ class TestHelpSubparsersOrdering(HelpTestCase):
class TestHelpSubparsersWithHelpOrdering(HelpTestCase):
"""Test ordering of subcommands in help matches the code"""
parser_signature = Sig(prog='PROG',
- description='display some subcommands',
- version='0.1')
+ description='display some subcommands')
+ argument_signatures = [Sig('-v', '--version', action='version', version='0.1')]
subcommand_data = (('a', 'a subcommand help'),
('b', 'b subcommand help'),
@@ -4052,6 +4055,37 @@ class TestHelpSubparsersWithHelpOrdering(HelpTestCase):
'''
+
+class TestHelpMetavarTypeFormatter(HelpTestCase):
+ """"""
+
+ def custom_type(string):
+ return string
+
+ parser_signature = Sig(prog='PROG', description='description',
+ formatter_class=argparse.MetavarTypeHelpFormatter)
+ argument_signatures = [Sig('a', type=int),
+ Sig('-b', type=custom_type),
+ Sig('-c', type=float, metavar='SOME FLOAT')]
+ argument_group_signatures = []
+ usage = '''\
+ usage: PROG [-h] [-b custom_type] [-c SOME FLOAT] int
+ '''
+ help = usage + '''\
+
+ description
+
+ positional arguments:
+ int
+
+ optional arguments:
+ -h, --help show this help message and exit
+ -b custom_type
+ -c SOME FLOAT
+ '''
+ version = ''
+
+
# =====================================
# Optional/Positional constructor tests
# =====================================
@@ -4280,32 +4314,28 @@ class TestOptionalsHelpVersionActions(TestCase):
parser.format_help(),
self._get_error(parser.parse_args, args_str.split()).stdout)
- def assertPrintVersionExit(self, parser, args_str):
- self.assertEqual(
- parser.format_version(),
- self._get_error(parser.parse_args, args_str.split()).stderr)
-
def assertArgumentParserError(self, parser, *args):
self.assertRaises(ArgumentParserError, parser.parse_args, args)
def test_version(self):
- parser = ErrorRaisingArgumentParser(version='1.0')
+ parser = ErrorRaisingArgumentParser()
+ parser.add_argument('-v', '--version', action='version', version='1.0')
self.assertPrintHelpExit(parser, '-h')
self.assertPrintHelpExit(parser, '--help')
- self.assertPrintVersionExit(parser, '-v')
- self.assertPrintVersionExit(parser, '--version')
+ self.assertRaises(AttributeError, getattr, parser, 'format_version')
def test_version_format(self):
- parser = ErrorRaisingArgumentParser(prog='PPP', version='%(prog)s 3.5')
+ parser = ErrorRaisingArgumentParser(prog='PPP')
+ parser.add_argument('-v', '--version', action='version', version='%(prog)s 3.5')
msg = self._get_error(parser.parse_args, ['-v']).stderr
self.assertEqual('PPP 3.5\n', msg)
def test_version_no_help(self):
- parser = ErrorRaisingArgumentParser(add_help=False, version='1.0')
+ parser = ErrorRaisingArgumentParser(add_help=False)
+ parser.add_argument('-v', '--version', action='version', version='1.0')
self.assertArgumentParserError(parser, '-h')
self.assertArgumentParserError(parser, '--help')
- self.assertPrintVersionExit(parser, '-v')
- self.assertPrintVersionExit(parser, '--version')
+ self.assertRaises(AttributeError, getattr, parser, 'format_version')
def test_version_action(self):
parser = ErrorRaisingArgumentParser(prog='XXX')
@@ -4325,12 +4355,13 @@ class TestOptionalsHelpVersionActions(TestCase):
parser.add_argument('-x', action='help')
parser.add_argument('-y', action='version')
self.assertPrintHelpExit(parser, '-x')
- self.assertPrintVersionExit(parser, '-y')
self.assertArgumentParserError(parser, '-v')
self.assertArgumentParserError(parser, '--version')
+ self.assertRaises(AttributeError, getattr, parser, 'format_version')
def test_help_version_extra_arguments(self):
- parser = ErrorRaisingArgumentParser(version='1.0')
+ parser = ErrorRaisingArgumentParser()
+ parser.add_argument('--version', action='version', version='1.0')
parser.add_argument('-x', action='store_true')
parser.add_argument('y')
@@ -4342,8 +4373,7 @@ class TestOptionalsHelpVersionActions(TestCase):
format = '%s %%s %s' % (prefix, suffix)
self.assertPrintHelpExit(parser, format % '-h')
self.assertPrintHelpExit(parser, format % '--help')
- self.assertPrintVersionExit(parser, format % '-v')
- self.assertPrintVersionExit(parser, format % '--version')
+ self.assertRaises(AttributeError, getattr, parser, 'format_version')
# ======================
@@ -4398,7 +4428,7 @@ class TestStrings(TestCase):
parser = argparse.ArgumentParser(prog='PROG')
string = (
"ArgumentParser(prog='PROG', usage=None, description=None, "
- "version=None, formatter_class=%r, conflict_handler='error', "
+ "formatter_class=%r, conflict_handler='error', "
"add_help=True)" % argparse.HelpFormatter)
self.assertStringEqual(parser, string)
@@ -4442,7 +4472,7 @@ class TestEncoding(TestCase):
def _test_module_encoding(self, path):
path, _ = os.path.splitext(path)
path += ".py"
- with codecs.open(path, 'r', 'utf8') as f:
+ with codecs.open(path, 'r', 'utf-8') as f:
f.read()
def test_argparse_module_encoding(self):
@@ -4484,6 +4514,67 @@ class TestArgumentTypeError(TestCase):
else:
self.fail()
+# =========================
+# MessageContentError tests
+# =========================
+
+class TestMessageContentError(TestCase):
+
+ def test_missing_argument_name_in_message(self):
+ parser = ErrorRaisingArgumentParser(prog='PROG', usage='')
+ parser.add_argument('req_pos', type=str)
+ parser.add_argument('-req_opt', type=int, required=True)
+ parser.add_argument('need_one', type=str, nargs='+')
+
+ with self.assertRaises(ArgumentParserError) as cm:
+ parser.parse_args([])
+ msg = str(cm.exception)
+ self.assertRegex(msg, 'req_pos')
+ self.assertRegex(msg, 'req_opt')
+ self.assertRegex(msg, 'need_one')
+ with self.assertRaises(ArgumentParserError) as cm:
+ parser.parse_args(['myXargument'])
+ msg = str(cm.exception)
+ self.assertNotIn(msg, 'req_pos')
+ self.assertRegex(msg, 'req_opt')
+ self.assertRegex(msg, 'need_one')
+ with self.assertRaises(ArgumentParserError) as cm:
+ parser.parse_args(['myXargument', '-req_opt=1'])
+ msg = str(cm.exception)
+ self.assertNotIn(msg, 'req_pos')
+ self.assertNotIn(msg, 'req_opt')
+ self.assertRegex(msg, 'need_one')
+
+ def test_optional_optional_not_in_message(self):
+ parser = ErrorRaisingArgumentParser(prog='PROG', usage='')
+ parser.add_argument('req_pos', type=str)
+ parser.add_argument('--req_opt', type=int, required=True)
+ parser.add_argument('--opt_opt', type=bool, nargs='?',
+ default=True)
+ with self.assertRaises(ArgumentParserError) as cm:
+ parser.parse_args([])
+ msg = str(cm.exception)
+ self.assertRegex(msg, 'req_pos')
+ self.assertRegex(msg, 'req_opt')
+ self.assertNotIn(msg, 'opt_opt')
+ with self.assertRaises(ArgumentParserError) as cm:
+ parser.parse_args(['--req_opt=1'])
+ msg = str(cm.exception)
+ self.assertRegex(msg, 'req_pos')
+ self.assertNotIn(msg, 'req_opt')
+ self.assertNotIn(msg, 'opt_opt')
+
+ def test_optional_positional_not_in_message(self):
+ parser = ErrorRaisingArgumentParser(prog='PROG', usage='')
+ parser.add_argument('req_pos')
+ parser.add_argument('optional_positional', nargs='?', default='eggs')
+ with self.assertRaises(ArgumentParserError) as cm:
+ parser.parse_args([])
+ msg = str(cm.exception)
+ self.assertRegex(msg, 'req_pos')
+ self.assertNotIn(msg, 'optional_positional')
+
+
# ================================================
# Check that the type function is called only once
# ================================================
@@ -4782,13 +4873,7 @@ class TestImportStar(TestCase):
self.assertEqual(sorted(items), sorted(argparse.__all__))
def test_main():
- # silence warnings about version argument - these are expected
- with support.check_warnings(
- ('The "version" argument to ArgumentParser is deprecated.',
- DeprecationWarning),
- ('The (format|print)_version method is deprecated',
- DeprecationWarning)):
- support.run_unittest(__name__)
+ support.run_unittest(__name__)
# Remove global references to avoid looking like we have refleaks.
RFile.seen = {}
WFile.seen = set()
diff --git a/Lib/test/test_array.py b/Lib/test/test_array.py
index e26e9ad..544c2ce 100755
--- a/Lib/test/test_array.py
+++ b/Lib/test/test_array.py
@@ -16,6 +16,13 @@ import warnings
import array
from array import _array_reconstructor as array_reconstructor
+try:
+ # Try to determine availability of long long independently
+ # of the array module under test
+ struct.calcsize('@q')
+ have_long_long = True
+except struct.error:
+ have_long_long = False
class ArraySubclass(array.array):
pass
@@ -26,6 +33,8 @@ class ArraySubclassWithKwargs(array.array):
tests = [] # list to accumulate all tests
typecodes = "ubBhHiIlLfd"
+if have_long_long:
+ typecodes += 'qQ'
class BadConstructorTest(unittest.TestCase):
@@ -209,10 +218,14 @@ class BaseTest(unittest.TestCase):
self.assertEqual(bi[1], len(a))
def test_byteswap(self):
- a = array.array(self.typecode, self.example)
+ if self.typecode == 'u':
+ example = '\U00100100'
+ else:
+ example = self.example
+ a = array.array(self.typecode, example)
self.assertRaises(TypeError, a.byteswap, 42)
if a.itemsize in (1, 2, 4, 8):
- b = array.array(self.typecode, self.example)
+ b = array.array(self.typecode, example)
b.byteswap()
if a.itemsize==1:
self.assertEqual(a, b)
@@ -272,6 +285,20 @@ class BaseTest(unittest.TestCase):
self.assertEqual(a.x, b.x)
self.assertEqual(type(a), type(b))
+ def test_iterator_pickle(self):
+ data = array.array(self.typecode, self.example)
+ orgit = iter(data)
+ d = pickle.dumps(orgit)
+ it = pickle.loads(d)
+ self.assertEqual(type(orgit), type(it))
+ self.assertEqual(list(it), list(data))
+
+ if len(data):
+ it = pickle.loads(d)
+ next(it)
+ d = pickle.dumps(it)
+ self.assertEqual(list(it), list(data)[1:])
+
def test_insert(self):
a = array.array(self.typecode, self.example)
a.insert(0, self.example[0])
@@ -991,14 +1018,14 @@ class BaseTest(unittest.TestCase):
@support.cpython_only
def test_sizeof_with_buffer(self):
a = array.array(self.typecode, self.example)
- basesize = support.calcvobjsize('4Pi')
+ basesize = support.calcvobjsize('Pn2Pi')
buffer_size = a.buffer_info()[1] * a.itemsize
support.check_sizeof(self, a, basesize + buffer_size)
@support.cpython_only
def test_sizeof_without_buffer(self):
a = array.array(self.typecode)
- basesize = support.calcvobjsize('4Pi')
+ basesize = support.calcvobjsize('Pn2Pi')
support.check_sizeof(self, a, basesize)
@@ -1018,6 +1045,16 @@ class UnicodeTest(StringTest):
minitemsize = 2
def test_unicode(self):
+ try:
+ import ctypes
+ sizeof_wchar = ctypes.sizeof(ctypes.c_wchar)
+ except ImportError:
+ import sys
+ if sys.platform == 'win32':
+ sizeof_wchar = 2
+ else:
+ sizeof_wchar = 4
+
self.assertRaises(TypeError, array.array, 'b', 'foo')
a = array.array('u', '\xa0\xc2\u1234')
@@ -1027,6 +1064,7 @@ class UnicodeTest(StringTest):
a.fromunicode('\x11abc\xff\u1234')
s = a.tounicode()
self.assertEqual(s, '\xa0\xc2\u1234 \x11abc\xff\u1234')
+ self.assertEqual(a.itemsize, sizeof_wchar)
s = '\x00="\'a\\b\x80\xff\u0000\u0001\u1234'
a = array.array('u', s)
@@ -1218,6 +1256,18 @@ class UnsignedLongTest(UnsignedNumberTest):
minitemsize = 4
tests.append(UnsignedLongTest)
+@unittest.skipIf(not have_long_long, 'need long long support')
+class LongLongTest(SignedNumberTest):
+ typecode = 'q'
+ minitemsize = 8
+tests.append(LongLongTest)
+
+@unittest.skipIf(not have_long_long, 'need long long support')
+class UnsignedLongLongTest(UnsignedNumberTest):
+ typecode = 'Q'
+ minitemsize = 8
+tests.append(UnsignedLongLongTest)
+
class FPTest(NumberTest):
example = [-42.0, 0, 42, 1e5, -1e10]
smallerexample = [-42.0, 0, 42, 1e5, -2e10]
diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py
index 2887092..a8853c7 100644
--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
@@ -1,6 +1,10 @@
-import sys, unittest
-from test import support
+import os
+import sys
+import unittest
import ast
+import weakref
+
+from test import support
def to_tuple(t):
if t is None or isinstance(t, (str, int, complex)):
@@ -52,6 +56,9 @@ exec_tests = [
"while v:pass",
# If
"if v:pass",
+ # With
+ "with x as y: pass",
+ "with x as y, z as q: pass",
# Raise
"raise Exception('string')",
# TryExcept
@@ -191,6 +198,9 @@ class AST_Tests(unittest.TestCase):
def test_AST_objects(self):
x = ast.AST()
self.assertEqual(x._fields, ())
+ x.foobar = 42
+ self.assertEqual(x.foobar, 42)
+ self.assertEqual(x.__dict__["foobar"], 42)
with self.assertRaises(AttributeError):
x.vararg
@@ -199,6 +209,17 @@ class AST_Tests(unittest.TestCase):
# "_ast.AST constructor takes 0 positional arguments"
ast.AST(2)
+ def test_AST_garbage_collection(self):
+ class X:
+ pass
+ a = ast.AST()
+ a.x = X()
+ a.x.a = a
+ ref = weakref.ref(a.x)
+ del a
+ support.gc_collect()
+ self.assertIsNone(ref())
+
def test_snippets(self):
for input, output, kind in ((exec_tests, exec_results, "exec"),
(single_tests, single_results, "single"),
@@ -390,7 +411,9 @@ class ASTHelpers_Test(unittest.TestCase):
try:
1/0
except Exception:
- self.assertRaises(SyntaxError, ast.parse, r"'\U'")
+ with self.assertRaises(SyntaxError) as e:
+ ast.literal_eval(r"'\U'")
+ self.assertIsNotNone(e.exception.__context__)
def test_dump(self):
node = ast.parse('spam(eggs, "and cheese")')
@@ -504,8 +527,413 @@ class ASTHelpers_Test(unittest.TestCase):
self.assertIn("invalid integer value: None", str(cm.exception))
+class ASTValidatorTests(unittest.TestCase):
+
+ def mod(self, mod, msg=None, mode="exec", *, exc=ValueError):
+ mod.lineno = mod.col_offset = 0
+ ast.fix_missing_locations(mod)
+ with self.assertRaises(exc) as cm:
+ compile(mod, "<test>", mode)
+ if msg is not None:
+ self.assertIn(msg, str(cm.exception))
+
+ def expr(self, node, msg=None, *, exc=ValueError):
+ mod = ast.Module([ast.Expr(node)])
+ self.mod(mod, msg, exc=exc)
+
+ def stmt(self, stmt, msg=None):
+ mod = ast.Module([stmt])
+ self.mod(mod, msg)
+
+ def test_module(self):
+ m = ast.Interactive([ast.Expr(ast.Name("x", ast.Store()))])
+ self.mod(m, "must have Load context", "single")
+ m = ast.Expression(ast.Name("x", ast.Store()))
+ self.mod(m, "must have Load context", "eval")
+
+ def _check_arguments(self, fac, check):
+ def arguments(args=None, vararg=None, varargannotation=None,
+ kwonlyargs=None, kwarg=None, kwargannotation=None,
+ defaults=None, kw_defaults=None):
+ if args is None:
+ args = []
+ if kwonlyargs is None:
+ kwonlyargs = []
+ if defaults is None:
+ defaults = []
+ if kw_defaults is None:
+ kw_defaults = []
+ args = ast.arguments(args, vararg, varargannotation, kwonlyargs,
+ kwarg, kwargannotation, defaults, kw_defaults)
+ return fac(args)
+ args = [ast.arg("x", ast.Name("x", ast.Store()))]
+ check(arguments(args=args), "must have Load context")
+ check(arguments(varargannotation=ast.Num(3)),
+ "varargannotation but no vararg")
+ check(arguments(varargannotation=ast.Name("x", ast.Store()), vararg="x"),
+ "must have Load context")
+ check(arguments(kwonlyargs=args), "must have Load context")
+ check(arguments(kwargannotation=ast.Num(42)),
+ "kwargannotation but no kwarg")
+ check(arguments(kwargannotation=ast.Name("x", ast.Store()),
+ kwarg="x"), "must have Load context")
+ check(arguments(defaults=[ast.Num(3)]),
+ "more positional defaults than args")
+ check(arguments(kw_defaults=[ast.Num(4)]),
+ "length of kwonlyargs is not the same as kw_defaults")
+ args = [ast.arg("x", ast.Name("x", ast.Load()))]
+ check(arguments(args=args, defaults=[ast.Name("x", ast.Store())]),
+ "must have Load context")
+ args = [ast.arg("a", ast.Name("x", ast.Load())),
+ ast.arg("b", ast.Name("y", ast.Load()))]
+ check(arguments(kwonlyargs=args,
+ kw_defaults=[None, ast.Name("x", ast.Store())]),
+ "must have Load context")
+
+ def test_funcdef(self):
+ a = ast.arguments([], None, None, [], None, None, [], [])
+ f = ast.FunctionDef("x", a, [], [], None)
+ self.stmt(f, "empty body on FunctionDef")
+ f = ast.FunctionDef("x", a, [ast.Pass()], [ast.Name("x", ast.Store())],
+ None)
+ self.stmt(f, "must have Load context")
+ f = ast.FunctionDef("x", a, [ast.Pass()], [],
+ ast.Name("x", ast.Store()))
+ self.stmt(f, "must have Load context")
+ def fac(args):
+ return ast.FunctionDef("x", args, [ast.Pass()], [], None)
+ self._check_arguments(fac, self.stmt)
+
+ def test_classdef(self):
+ def cls(bases=None, keywords=None, starargs=None, kwargs=None,
+ body=None, decorator_list=None):
+ if bases is None:
+ bases = []
+ if keywords is None:
+ keywords = []
+ if body is None:
+ body = [ast.Pass()]
+ if decorator_list is None:
+ decorator_list = []
+ return ast.ClassDef("myclass", bases, keywords, starargs,
+ kwargs, body, decorator_list)
+ self.stmt(cls(bases=[ast.Name("x", ast.Store())]),
+ "must have Load context")
+ self.stmt(cls(keywords=[ast.keyword("x", ast.Name("x", ast.Store()))]),
+ "must have Load context")
+ self.stmt(cls(starargs=ast.Name("x", ast.Store())),
+ "must have Load context")
+ self.stmt(cls(kwargs=ast.Name("x", ast.Store())),
+ "must have Load context")
+ self.stmt(cls(body=[]), "empty body on ClassDef")
+ self.stmt(cls(body=[None]), "None disallowed")
+ self.stmt(cls(decorator_list=[ast.Name("x", ast.Store())]),
+ "must have Load context")
+
+ def test_delete(self):
+ self.stmt(ast.Delete([]), "empty targets on Delete")
+ self.stmt(ast.Delete([None]), "None disallowed")
+ self.stmt(ast.Delete([ast.Name("x", ast.Load())]),
+ "must have Del context")
+
+ def test_assign(self):
+ self.stmt(ast.Assign([], ast.Num(3)), "empty targets on Assign")
+ self.stmt(ast.Assign([None], ast.Num(3)), "None disallowed")
+ self.stmt(ast.Assign([ast.Name("x", ast.Load())], ast.Num(3)),
+ "must have Store context")
+ self.stmt(ast.Assign([ast.Name("x", ast.Store())],
+ ast.Name("y", ast.Store())),
+ "must have Load context")
+
+ def test_augassign(self):
+ aug = ast.AugAssign(ast.Name("x", ast.Load()), ast.Add(),
+ ast.Name("y", ast.Load()))
+ self.stmt(aug, "must have Store context")
+ aug = ast.AugAssign(ast.Name("x", ast.Store()), ast.Add(),
+ ast.Name("y", ast.Store()))
+ self.stmt(aug, "must have Load context")
+
+ def test_for(self):
+ x = ast.Name("x", ast.Store())
+ y = ast.Name("y", ast.Load())
+ p = ast.Pass()
+ self.stmt(ast.For(x, y, [], []), "empty body on For")
+ self.stmt(ast.For(ast.Name("x", ast.Load()), y, [p], []),
+ "must have Store context")
+ self.stmt(ast.For(x, ast.Name("y", ast.Store()), [p], []),
+ "must have Load context")
+ e = ast.Expr(ast.Name("x", ast.Store()))
+ self.stmt(ast.For(x, y, [e], []), "must have Load context")
+ self.stmt(ast.For(x, y, [p], [e]), "must have Load context")
+
+ def test_while(self):
+ self.stmt(ast.While(ast.Num(3), [], []), "empty body on While")
+ self.stmt(ast.While(ast.Name("x", ast.Store()), [ast.Pass()], []),
+ "must have Load context")
+ self.stmt(ast.While(ast.Num(3), [ast.Pass()],
+ [ast.Expr(ast.Name("x", ast.Store()))]),
+ "must have Load context")
+
+ def test_if(self):
+ self.stmt(ast.If(ast.Num(3), [], []), "empty body on If")
+ i = ast.If(ast.Name("x", ast.Store()), [ast.Pass()], [])
+ self.stmt(i, "must have Load context")
+ i = ast.If(ast.Num(3), [ast.Expr(ast.Name("x", ast.Store()))], [])
+ self.stmt(i, "must have Load context")
+ i = ast.If(ast.Num(3), [ast.Pass()],
+ [ast.Expr(ast.Name("x", ast.Store()))])
+ self.stmt(i, "must have Load context")
+
+ def test_with(self):
+ p = ast.Pass()
+ self.stmt(ast.With([], [p]), "empty items on With")
+ i = ast.withitem(ast.Num(3), None)
+ self.stmt(ast.With([i], []), "empty body on With")
+ i = ast.withitem(ast.Name("x", ast.Store()), None)
+ self.stmt(ast.With([i], [p]), "must have Load context")
+ i = ast.withitem(ast.Num(3), ast.Name("x", ast.Load()))
+ self.stmt(ast.With([i], [p]), "must have Store context")
+
+ def test_raise(self):
+ r = ast.Raise(None, ast.Num(3))
+ self.stmt(r, "Raise with cause but no exception")
+ r = ast.Raise(ast.Name("x", ast.Store()), None)
+ self.stmt(r, "must have Load context")
+ r = ast.Raise(ast.Num(4), ast.Name("x", ast.Store()))
+ self.stmt(r, "must have Load context")
+
+ def test_try(self):
+ p = ast.Pass()
+ t = ast.Try([], [], [], [p])
+ self.stmt(t, "empty body on Try")
+ t = ast.Try([ast.Expr(ast.Name("x", ast.Store()))], [], [], [p])
+ self.stmt(t, "must have Load context")
+ t = ast.Try([p], [], [], [])
+ self.stmt(t, "Try has neither except handlers nor finalbody")
+ t = ast.Try([p], [], [p], [p])
+ self.stmt(t, "Try has orelse but no except handlers")
+ t = ast.Try([p], [ast.ExceptHandler(None, "x", [])], [], [])
+ self.stmt(t, "empty body on ExceptHandler")
+ e = [ast.ExceptHandler(ast.Name("x", ast.Store()), "y", [p])]
+ self.stmt(ast.Try([p], e, [], []), "must have Load context")
+ e = [ast.ExceptHandler(None, "x", [p])]
+ t = ast.Try([p], e, [ast.Expr(ast.Name("x", ast.Store()))], [p])
+ self.stmt(t, "must have Load context")
+ t = ast.Try([p], e, [p], [ast.Expr(ast.Name("x", ast.Store()))])
+ self.stmt(t, "must have Load context")
+
+ def test_assert(self):
+ self.stmt(ast.Assert(ast.Name("x", ast.Store()), None),
+ "must have Load context")
+ assrt = ast.Assert(ast.Name("x", ast.Load()),
+ ast.Name("y", ast.Store()))
+ self.stmt(assrt, "must have Load context")
+
+ def test_import(self):
+ self.stmt(ast.Import([]), "empty names on Import")
+
+ def test_importfrom(self):
+ imp = ast.ImportFrom(None, [ast.alias("x", None)], -42)
+ self.stmt(imp, "level less than -1")
+ self.stmt(ast.ImportFrom(None, [], 0), "empty names on ImportFrom")
+
+ def test_global(self):
+ self.stmt(ast.Global([]), "empty names on Global")
+
+ def test_nonlocal(self):
+ self.stmt(ast.Nonlocal([]), "empty names on Nonlocal")
+
+ def test_expr(self):
+ e = ast.Expr(ast.Name("x", ast.Store()))
+ self.stmt(e, "must have Load context")
+
+ def test_boolop(self):
+ b = ast.BoolOp(ast.And(), [])
+ self.expr(b, "less than 2 values")
+ b = ast.BoolOp(ast.And(), [ast.Num(3)])
+ self.expr(b, "less than 2 values")
+ b = ast.BoolOp(ast.And(), [ast.Num(4), None])
+ self.expr(b, "None disallowed")
+ b = ast.BoolOp(ast.And(), [ast.Num(4), ast.Name("x", ast.Store())])
+ self.expr(b, "must have Load context")
+
+ def test_unaryop(self):
+ u = ast.UnaryOp(ast.Not(), ast.Name("x", ast.Store()))
+ self.expr(u, "must have Load context")
+
+ def test_lambda(self):
+ a = ast.arguments([], None, None, [], None, None, [], [])
+ self.expr(ast.Lambda(a, ast.Name("x", ast.Store())),
+ "must have Load context")
+ def fac(args):
+ return ast.Lambda(args, ast.Name("x", ast.Load()))
+ self._check_arguments(fac, self.expr)
+
+ def test_ifexp(self):
+ l = ast.Name("x", ast.Load())
+ s = ast.Name("y", ast.Store())
+ for args in (s, l, l), (l, s, l), (l, l, s):
+ self.expr(ast.IfExp(*args), "must have Load context")
+
+ def test_dict(self):
+ d = ast.Dict([], [ast.Name("x", ast.Load())])
+ self.expr(d, "same number of keys as values")
+ d = ast.Dict([None], [ast.Name("x", ast.Load())])
+ self.expr(d, "None disallowed")
+ d = ast.Dict([ast.Name("x", ast.Load())], [None])
+ self.expr(d, "None disallowed")
+
+ def test_set(self):
+ self.expr(ast.Set([None]), "None disallowed")
+ s = ast.Set([ast.Name("x", ast.Store())])
+ self.expr(s, "must have Load context")
+
+ def _check_comprehension(self, fac):
+ self.expr(fac([]), "comprehension with no generators")
+ g = ast.comprehension(ast.Name("x", ast.Load()),
+ ast.Name("x", ast.Load()), [])
+ self.expr(fac([g]), "must have Store context")
+ g = ast.comprehension(ast.Name("x", ast.Store()),
+ ast.Name("x", ast.Store()), [])
+ self.expr(fac([g]), "must have Load context")
+ x = ast.Name("x", ast.Store())
+ y = ast.Name("y", ast.Load())
+ g = ast.comprehension(x, y, [None])
+ self.expr(fac([g]), "None disallowed")
+ g = ast.comprehension(x, y, [ast.Name("x", ast.Store())])
+ self.expr(fac([g]), "must have Load context")
+
+ def _simple_comp(self, fac):
+ g = ast.comprehension(ast.Name("x", ast.Store()),
+ ast.Name("x", ast.Load()), [])
+ self.expr(fac(ast.Name("x", ast.Store()), [g]),
+ "must have Load context")
+ def wrap(gens):
+ return fac(ast.Name("x", ast.Store()), gens)
+ self._check_comprehension(wrap)
+
+ def test_listcomp(self):
+ self._simple_comp(ast.ListComp)
+
+ def test_setcomp(self):
+ self._simple_comp(ast.SetComp)
+
+ def test_generatorexp(self):
+ self._simple_comp(ast.GeneratorExp)
+
+ def test_dictcomp(self):
+ g = ast.comprehension(ast.Name("y", ast.Store()),
+ ast.Name("p", ast.Load()), [])
+ c = ast.DictComp(ast.Name("x", ast.Store()),
+ ast.Name("y", ast.Load()), [g])
+ self.expr(c, "must have Load context")
+ c = ast.DictComp(ast.Name("x", ast.Load()),
+ ast.Name("y", ast.Store()), [g])
+ self.expr(c, "must have Load context")
+ def factory(comps):
+ k = ast.Name("x", ast.Load())
+ v = ast.Name("y", ast.Load())
+ return ast.DictComp(k, v, comps)
+ self._check_comprehension(factory)
+
+ def test_yield(self):
+ self.expr(ast.Yield(ast.Name("x", ast.Store())), "must have Load")
+ self.expr(ast.YieldFrom(ast.Name("x", ast.Store())), "must have Load")
+
+ def test_compare(self):
+ left = ast.Name("x", ast.Load())
+ comp = ast.Compare(left, [ast.In()], [])
+ self.expr(comp, "no comparators")
+ comp = ast.Compare(left, [ast.In()], [ast.Num(4), ast.Num(5)])
+ self.expr(comp, "different number of comparators and operands")
+ comp = ast.Compare(ast.Num("blah"), [ast.In()], [left])
+ self.expr(comp, "non-numeric", exc=TypeError)
+ comp = ast.Compare(left, [ast.In()], [ast.Num("blah")])
+ self.expr(comp, "non-numeric", exc=TypeError)
+
+ def test_call(self):
+ func = ast.Name("x", ast.Load())
+ args = [ast.Name("y", ast.Load())]
+ keywords = [ast.keyword("w", ast.Name("z", ast.Load()))]
+ stararg = ast.Name("p", ast.Load())
+ kwarg = ast.Name("q", ast.Load())
+ call = ast.Call(ast.Name("x", ast.Store()), args, keywords, stararg,
+ kwarg)
+ self.expr(call, "must have Load context")
+ call = ast.Call(func, [None], keywords, stararg, kwarg)
+ self.expr(call, "None disallowed")
+ bad_keywords = [ast.keyword("w", ast.Name("z", ast.Store()))]
+ call = ast.Call(func, args, bad_keywords, stararg, kwarg)
+ self.expr(call, "must have Load context")
+ call = ast.Call(func, args, keywords, ast.Name("z", ast.Store()), kwarg)
+ self.expr(call, "must have Load context")
+ call = ast.Call(func, args, keywords, stararg,
+ ast.Name("w", ast.Store()))
+ self.expr(call, "must have Load context")
+
+ def test_num(self):
+ class subint(int):
+ pass
+ class subfloat(float):
+ pass
+ class subcomplex(complex):
+ pass
+ for obj in "0", "hello", subint(), subfloat(), subcomplex():
+ self.expr(ast.Num(obj), "non-numeric", exc=TypeError)
+
+ def test_attribute(self):
+ attr = ast.Attribute(ast.Name("x", ast.Store()), "y", ast.Load())
+ self.expr(attr, "must have Load context")
+
+ def test_subscript(self):
+ sub = ast.Subscript(ast.Name("x", ast.Store()), ast.Index(ast.Num(3)),
+ ast.Load())
+ self.expr(sub, "must have Load context")
+ x = ast.Name("x", ast.Load())
+ sub = ast.Subscript(x, ast.Index(ast.Name("y", ast.Store())),
+ ast.Load())
+ self.expr(sub, "must have Load context")
+ s = ast.Name("x", ast.Store())
+ for args in (s, None, None), (None, s, None), (None, None, s):
+ sl = ast.Slice(*args)
+ self.expr(ast.Subscript(x, sl, ast.Load()),
+ "must have Load context")
+ sl = ast.ExtSlice([])
+ self.expr(ast.Subscript(x, sl, ast.Load()), "empty dims on ExtSlice")
+ sl = ast.ExtSlice([ast.Index(s)])
+ self.expr(ast.Subscript(x, sl, ast.Load()), "must have Load context")
+
+ def test_starred(self):
+ left = ast.List([ast.Starred(ast.Name("x", ast.Load()), ast.Store())],
+ ast.Store())
+ assign = ast.Assign([left], ast.Num(4))
+ self.stmt(assign, "must have Store context")
+
+ def _sequence(self, fac):
+ self.expr(fac([None], ast.Load()), "None disallowed")
+ self.expr(fac([ast.Name("x", ast.Store())], ast.Load()),
+ "must have Load context")
+
+ def test_list(self):
+ self._sequence(ast.List)
+
+ def test_tuple(self):
+ self._sequence(ast.Tuple)
+
+ def test_stdlib_validates(self):
+ stdlib = os.path.dirname(ast.__file__)
+ tests = [fn for fn in os.listdir(stdlib) if fn.endswith(".py")]
+ tests.extend(["test/test_grammar.py", "test/test_unpack_ex.py"])
+ for module in tests:
+ fn = os.path.join(stdlib, module)
+ with open(fn, "r", encoding="utf-8") as fp:
+ source = fp.read()
+ mod = ast.parse(source)
+ compile(mod, fn, "exec")
+
+
def test_main():
- support.run_unittest(AST_Tests, ASTHelpers_Test)
+ support.run_unittest(AST_Tests, ASTHelpers_Test, ASTValidatorTests)
def main():
if __name__ != '__main__':
@@ -539,9 +967,11 @@ exec_results = [
('Module', [('For', (1, 0), ('Name', (1, 4), 'v', ('Store',)), ('Name', (1, 9), 'v', ('Load',)), [('Pass', (1, 11))], [])]),
('Module', [('While', (1, 0), ('Name', (1, 6), 'v', ('Load',)), [('Pass', (1, 8))], [])]),
('Module', [('If', (1, 0), ('Name', (1, 3), 'v', ('Load',)), [('Pass', (1, 5))], [])]),
+('Module', [('With', (1, 0), [('withitem', ('Name', (1, 5), 'x', ('Load',)), ('Name', (1, 10), 'y', ('Store',)))], [('Pass', (1, 13))])]),
+('Module', [('With', (1, 0), [('withitem', ('Name', (1, 5), 'x', ('Load',)), ('Name', (1, 10), 'y', ('Store',))), ('withitem', ('Name', (1, 13), 'z', ('Load',)), ('Name', (1, 18), 'q', ('Store',)))], [('Pass', (1, 21))])]),
('Module', [('Raise', (1, 0), ('Call', (1, 6), ('Name', (1, 6), 'Exception', ('Load',)), [('Str', (1, 16), 'string')], [], None, None), None)]),
-('Module', [('TryExcept', (1, 0), [('Pass', (2, 2))], [('ExceptHandler', (3, 0), ('Name', (3, 7), 'Exception', ('Load',)), None, [('Pass', (4, 2))])], [])]),
-('Module', [('TryFinally', (1, 0), [('Pass', (2, 2))], [('Pass', (4, 2))])]),
+('Module', [('Try', (1, 0), [('Pass', (2, 2))], [('ExceptHandler', (3, 0), ('Name', (3, 7), 'Exception', ('Load',)), None, [('Pass', (4, 2))])], [], [])]),
+('Module', [('Try', (1, 0), [('Pass', (2, 2))], [], [], [('Pass', (4, 2))])]),
('Module', [('Assert', (1, 0), ('Name', (1, 7), 'v', ('Load',)), None)]),
('Module', [('Import', (1, 0), [('alias', 'sys', None)])]),
('Module', [('ImportFrom', (1, 0), 'sys', [('alias', 'v', None)], 0)]),
diff --git a/Lib/test/test_asyncore.py b/Lib/test/test_asyncore.py
index 5f55df8..42a2525 100644
--- a/Lib/test/test_asyncore.py
+++ b/Lib/test/test_asyncore.py
@@ -21,6 +21,8 @@ except ImportError:
HOST = support.HOST
+HAS_UNIX_SOCKETS = hasattr(socket, 'AF_UNIX')
+
class dummysocket:
def __init__(self):
self.closed = False
@@ -72,15 +74,16 @@ def capture_server(evt, buf, serv):
pass
else:
n = 200
- while n > 0:
- r, w, e = select.select([conn], [], [])
+ start = time.time()
+ while n > 0 and time.time() - start < 3.0:
+ r, w, e = select.select([conn], [], [], 0.1)
if r:
+ n -= 1
data = conn.recv(10)
# keep everything except for the newline terminator
buf.write(data.replace(b'\n', b''))
if b'\n' in data:
break
- n -= 1
time.sleep(0.01)
conn.close()
@@ -88,6 +91,13 @@ def capture_server(evt, buf, serv):
serv.close()
evt.set()
+def bind_af_aware(sock, addr):
+ """Helper function to bind a socket according to its family."""
+ if HAS_UNIX_SOCKETS and sock.family == socket.AF_UNIX:
+ # Make sure the path doesn't exist.
+ unlink(addr)
+ sock.bind(addr)
+
class HelperFunctionTests(unittest.TestCase):
def test_readwriteexc(self):
@@ -353,7 +363,7 @@ class DispatcherWithSendTests(unittest.TestCase):
@support.reap_threads
def test_send(self):
evt = threading.Event()
- sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock = socket.socket()
sock.settimeout(3)
port = support.bind_port(sock)
@@ -368,7 +378,7 @@ class DispatcherWithSendTests(unittest.TestCase):
data = b"Suppose there isn't a 16-ton weight?"
d = dispatcherwithsend_noread()
- d.create_socket(socket.AF_INET, socket.SOCK_STREAM)
+ d.create_socket()
d.connect((HOST, port))
# give time for socket to connect
@@ -468,22 +478,22 @@ class BaseTestHandler(asyncore.dispatcher):
raise
-class TCPServer(asyncore.dispatcher):
+class BaseServer(asyncore.dispatcher):
"""A server which listens on an address and dispatches the
connection to a handler.
"""
- def __init__(self, handler=BaseTestHandler, host=HOST, port=0):
+ def __init__(self, family, addr, handler=BaseTestHandler):
asyncore.dispatcher.__init__(self)
- self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.create_socket(family)
self.set_reuse_addr()
- self.bind((host, port))
+ bind_af_aware(self.socket, addr)
self.listen(5)
self.handler = handler
@property
def address(self):
- return self.socket.getsockname()[:2]
+ return self.socket.getsockname()
def handle_accepted(self, sock, addr):
self.handler(sock)
@@ -494,9 +504,9 @@ class TCPServer(asyncore.dispatcher):
class BaseClient(BaseTestHandler):
- def __init__(self, address):
+ def __init__(self, family, address):
BaseTestHandler.__init__(self)
- self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.create_socket(family)
self.connect(address)
def handle_connect(self):
@@ -526,8 +536,8 @@ class BaseTestAPI(unittest.TestCase):
def handle_connect(self):
self.flag = True
- server = TCPServer()
- client = TestClient(server.address)
+ server = BaseServer(self.family, self.addr)
+ client = TestClient(self.family, server.address)
self.loop_waiting_for_flag(client)
def test_handle_accept(self):
@@ -535,18 +545,18 @@ class BaseTestAPI(unittest.TestCase):
class TestListener(BaseTestHandler):
- def __init__(self):
+ def __init__(self, family, addr):
BaseTestHandler.__init__(self)
- self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
- self.bind((HOST, 0))
+ self.create_socket(family)
+ bind_af_aware(self.socket, addr)
self.listen(5)
- self.address = self.socket.getsockname()[:2]
+ self.address = self.socket.getsockname()
def handle_accept(self):
self.flag = True
- server = TestListener()
- client = BaseClient(server.address)
+ server = TestListener(self.family, self.addr)
+ client = BaseClient(self.family, server.address)
self.loop_waiting_for_flag(server)
def test_handle_accepted(self):
@@ -554,12 +564,12 @@ class BaseTestAPI(unittest.TestCase):
class TestListener(BaseTestHandler):
- def __init__(self):
+ def __init__(self, family, addr):
BaseTestHandler.__init__(self)
- self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
- self.bind((HOST, 0))
+ self.create_socket(family)
+ bind_af_aware(self.socket, addr)
self.listen(5)
- self.address = self.socket.getsockname()[:2]
+ self.address = self.socket.getsockname()
def handle_accept(self):
asyncore.dispatcher.handle_accept(self)
@@ -568,8 +578,8 @@ class BaseTestAPI(unittest.TestCase):
sock.close()
self.flag = True
- server = TestListener()
- client = BaseClient(server.address)
+ server = TestListener(self.family, self.addr)
+ client = BaseClient(self.family, server.address)
self.loop_waiting_for_flag(server)
@@ -585,8 +595,8 @@ class BaseTestAPI(unittest.TestCase):
BaseTestHandler.__init__(self, conn)
self.send(b'x' * 1024)
- server = TCPServer(TestHandler)
- client = TestClient(server.address)
+ server = BaseServer(self.family, self.addr, TestHandler)
+ client = TestClient(self.family, server.address)
self.loop_waiting_for_flag(client)
def test_handle_write(self):
@@ -596,8 +606,8 @@ class BaseTestAPI(unittest.TestCase):
def handle_write(self):
self.flag = True
- server = TCPServer()
- client = TestClient(server.address)
+ server = BaseServer(self.family, self.addr)
+ client = TestClient(self.family, server.address)
self.loop_waiting_for_flag(client)
def test_handle_close(self):
@@ -620,8 +630,40 @@ class BaseTestAPI(unittest.TestCase):
BaseTestHandler.__init__(self, conn)
self.close()
- server = TCPServer(TestHandler)
- client = TestClient(server.address)
+ server = BaseServer(self.family, self.addr, TestHandler)
+ client = TestClient(self.family, server.address)
+ self.loop_waiting_for_flag(client)
+
+ def test_handle_close_after_conn_broken(self):
+ # Check that ECONNRESET/EPIPE is correctly handled (issues #5661 and
+ # #11265).
+
+ data = b'\0' * 128
+
+ class TestClient(BaseClient):
+
+ def handle_write(self):
+ self.send(data)
+
+ def handle_close(self):
+ self.flag = True
+ self.close()
+
+ def handle_expt(self):
+ self.flag = True
+ self.close()
+
+ class TestHandler(BaseTestHandler):
+
+ def handle_read(self):
+ self.recv(len(data))
+ self.close()
+
+ def writable(self):
+ return False
+
+ server = BaseServer(self.family, self.addr, TestHandler)
+ client = TestClient(self.family, server.address)
self.loop_waiting_for_flag(client)
@unittest.skipIf(sys.platform.startswith("sunos"),
@@ -630,9 +672,12 @@ class BaseTestAPI(unittest.TestCase):
# Make sure handle_expt is called on OOB data received.
# Note: this might fail on some platforms as OOB data is
# tenuously supported and rarely used.
+ if HAS_UNIX_SOCKETS and self.family == socket.AF_UNIX:
+ self.skipTest("Not applicable to AF_UNIX sockets.")
class TestClient(BaseClient):
def handle_expt(self):
+ self.socket.recv(1024, socket.MSG_OOB)
self.flag = True
class TestHandler(BaseTestHandler):
@@ -640,8 +685,8 @@ class BaseTestAPI(unittest.TestCase):
BaseTestHandler.__init__(self, conn)
self.socket.send(bytes(chr(244), 'latin-1'), socket.MSG_OOB)
- server = TCPServer(TestHandler)
- client = TestClient(server.address)
+ server = BaseServer(self.family, self.addr, TestHandler)
+ client = TestClient(self.family, server.address)
self.loop_waiting_for_flag(client)
def test_handle_error(self):
@@ -658,13 +703,13 @@ class BaseTestAPI(unittest.TestCase):
else:
raise Exception("exception not raised")
- server = TCPServer()
- client = TestClient(server.address)
+ server = BaseServer(self.family, self.addr)
+ client = TestClient(self.family, server.address)
self.loop_waiting_for_flag(client)
def test_connection_attributes(self):
- server = TCPServer()
- client = BaseClient(server.address)
+ server = BaseServer(self.family, self.addr)
+ client = BaseClient(self.family, server.address)
# we start disconnected
self.assertFalse(server.connected)
@@ -694,25 +739,29 @@ class BaseTestAPI(unittest.TestCase):
def test_create_socket(self):
s = asyncore.dispatcher()
- s.create_socket(socket.AF_INET, socket.SOCK_STREAM)
- self.assertEqual(s.socket.family, socket.AF_INET)
+ s.create_socket(self.family)
+ self.assertEqual(s.socket.family, self.family)
SOCK_NONBLOCK = getattr(socket, 'SOCK_NONBLOCK', 0)
self.assertEqual(s.socket.type, socket.SOCK_STREAM | SOCK_NONBLOCK)
def test_bind(self):
+ if HAS_UNIX_SOCKETS and self.family == socket.AF_UNIX:
+ self.skipTest("Not applicable to AF_UNIX sockets.")
s1 = asyncore.dispatcher()
- s1.create_socket(socket.AF_INET, socket.SOCK_STREAM)
- s1.bind((HOST, 0))
+ s1.create_socket(self.family)
+ s1.bind(self.addr)
s1.listen(5)
port = s1.socket.getsockname()[1]
s2 = asyncore.dispatcher()
- s2.create_socket(socket.AF_INET, socket.SOCK_STREAM)
+ s2.create_socket(self.family)
# EADDRINUSE indicates the socket was correctly bound
- self.assertRaises(socket.error, s2.bind, (HOST, port))
+ self.assertRaises(socket.error, s2.bind, (self.addr[0], port))
def test_set_reuse_addr(self):
- sock = socket.socket()
+ if HAS_UNIX_SOCKETS and self.family == socket.AF_UNIX:
+ self.skipTest("Not applicable to AF_UNIX sockets.")
+ sock = socket.socket(self.family)
try:
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
except socket.error:
@@ -720,11 +769,11 @@ class BaseTestAPI(unittest.TestCase):
else:
# if SO_REUSEADDR succeeded for sock we expect asyncore
# to do the same
- s = asyncore.dispatcher(socket.socket())
+ s = asyncore.dispatcher(socket.socket(self.family))
self.assertFalse(s.socket.getsockopt(socket.SOL_SOCKET,
socket.SO_REUSEADDR))
s.socket.close()
- s.create_socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.create_socket(self.family)
s.set_reuse_addr()
self.assertTrue(s.socket.getsockopt(socket.SOL_SOCKET,
socket.SO_REUSEADDR))
@@ -735,12 +784,14 @@ class BaseTestAPI(unittest.TestCase):
@support.reap_threads
def test_quick_connect(self):
# see: http://bugs.python.org/issue10340
- server = TCPServer()
- t = threading.Thread(target=lambda: asyncore.loop(timeout=0.1, count=500))
- t.start()
+ if self.family in (socket.AF_INET, getattr(socket, "AF_INET6", object())):
+ server = BaseServer(self.family, self.addr)
+ t = threading.Thread(target=lambda: asyncore.loop(timeout=0.1,
+ count=500))
+ t.start()
+
- for x in range(20):
- s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s = socket.socket(self.family, socket.SOCK_STREAM)
s.settimeout(.2)
s.setsockopt(socket.SOL_SOCKET, socket.SO_LINGER,
struct.pack('ii', 1, 0))
@@ -751,18 +802,52 @@ class BaseTestAPI(unittest.TestCase):
finally:
s.close()
-class TestAPI_UseSelect(BaseTestAPI):
+class TestAPI_UseIPv4Sockets(BaseTestAPI):
+ family = socket.AF_INET
+ addr = (HOST, 0)
+
+@unittest.skipUnless(support.IPV6_ENABLED, 'IPv6 support required')
+class TestAPI_UseIPv6Sockets(BaseTestAPI):
+ family = socket.AF_INET6
+ addr = ('::1', 0)
+
+@unittest.skipUnless(HAS_UNIX_SOCKETS, 'Unix sockets required')
+class TestAPI_UseUnixSockets(BaseTestAPI):
+ if HAS_UNIX_SOCKETS:
+ family = socket.AF_UNIX
+ addr = support.TESTFN
+
+ def tearDown(self):
+ unlink(self.addr)
+ BaseTestAPI.tearDown(self)
+
+class TestAPI_UseIPv4Select(TestAPI_UseIPv4Sockets):
use_poll = False
@unittest.skipUnless(hasattr(select, 'poll'), 'select.poll required')
-class TestAPI_UsePoll(BaseTestAPI):
+class TestAPI_UseIPv4Poll(TestAPI_UseIPv4Sockets):
use_poll = True
+class TestAPI_UseIPv6Select(TestAPI_UseIPv6Sockets):
+ use_poll = False
+
+@unittest.skipUnless(hasattr(select, 'poll'), 'select.poll required')
+class TestAPI_UseIPv6Poll(TestAPI_UseIPv6Sockets):
+ use_poll = True
+
+class TestAPI_UseUnixSocketsSelect(TestAPI_UseUnixSockets):
+ use_poll = False
+
+@unittest.skipUnless(hasattr(select, 'poll'), 'select.poll required')
+class TestAPI_UseUnixSocketsPoll(TestAPI_UseUnixSockets):
+ use_poll = True
def test_main():
tests = [HelperFunctionTests, DispatcherTests, DispatcherWithSendTests,
- DispatcherWithSendTests_UsePoll, TestAPI_UseSelect,
- TestAPI_UsePoll, FileWrapperTest]
+ DispatcherWithSendTests_UsePoll, FileWrapperTest,
+ TestAPI_UseIPv4Select, TestAPI_UseIPv4Poll, TestAPI_UseIPv6Select,
+ TestAPI_UseIPv6Poll, TestAPI_UseUnixSocketsSelect,
+ TestAPI_UseUnixSocketsPoll]
run_unittest(*tests)
if __name__ == "__main__":
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index ca94504..2569476 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -103,44 +103,53 @@ class BaseXYTestCase(unittest.TestCase):
def test_b64decode(self):
eq = self.assertEqual
- eq(base64.b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
- eq(base64.b64decode(b'AA=='), b'\x00')
- eq(base64.b64decode(b"YQ=="), b"a")
- eq(base64.b64decode(b"YWI="), b"ab")
- eq(base64.b64decode(b"YWJj"), b"abc")
- eq(base64.b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
- b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
- b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
- b"abcdefghijklmnopqrstuvwxyz"
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- b"0123456789!@#0^&*();:<>,. []{}")
- eq(base64.b64decode(b''), b'')
+
+ tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org",
+ b'AA==': b'\x00',
+ b"YQ==": b"a",
+ b"YWI=": b"ab",
+ b"YWJj": b"abc",
+ b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
+ b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
+ b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==":
+
+ b"abcdefghijklmnopqrstuvwxyz"
+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ b"0123456789!@#0^&*();:<>,. []{}",
+ b'': b'',
+ }
+ for data, res in tests.items():
+ eq(base64.b64decode(data), res)
+ eq(base64.b64decode(data.decode('ascii')), res)
+
# Test with arbitrary alternative characters
- eq(base64.b64decode(b'01a*b$cd', altchars=b'*$'), b'\xd3V\xbeo\xf7\x1d')
- # Check if passing a str object raises an error
- self.assertRaises(TypeError, base64.b64decode, "")
- self.assertRaises(TypeError, base64.b64decode, b"", altchars="")
+ tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d',
+ }
+ for (data, altchars), res in tests_altchars.items():
+ data_str = data.decode('ascii')
+ altchars_str = altchars.decode('ascii')
+
+ eq(base64.b64decode(data, altchars=altchars), res)
+ eq(base64.b64decode(data_str, altchars=altchars), res)
+ eq(base64.b64decode(data, altchars=altchars_str), res)
+ eq(base64.b64decode(data_str, altchars=altchars_str), res)
+
# Test standard alphabet
- eq(base64.standard_b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
- eq(base64.standard_b64decode(b"YQ=="), b"a")
- eq(base64.standard_b64decode(b"YWI="), b"ab")
- eq(base64.standard_b64decode(b"YWJj"), b"abc")
- eq(base64.standard_b64decode(b""), b"")
- eq(base64.standard_b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
- b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
- b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
- b"abcdefghijklmnopqrstuvwxyz"
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- b"0123456789!@#0^&*();:<>,. []{}")
- # Check if passing a str object raises an error
- self.assertRaises(TypeError, base64.standard_b64decode, "")
- self.assertRaises(TypeError, base64.standard_b64decode, b"", altchars="")
+ for data, res in tests.items():
+ eq(base64.standard_b64decode(data), res)
+ eq(base64.standard_b64decode(data.decode('ascii')), res)
+
# Test with 'URL safe' alternative characters
- eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d')
- self.assertRaises(TypeError, base64.urlsafe_b64decode, "")
+ tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d',
+ b'': b'',
+ }
+ for data, res in tests_urlsafe.items():
+ eq(base64.urlsafe_b64decode(data), res)
+ eq(base64.urlsafe_b64decode(data.decode('ascii')), res)
def test_b64decode_padding_error(self):
self.assertRaises(binascii.Error, base64.b64decode, b'abc')
+ self.assertRaises(binascii.Error, base64.b64decode, 'abc')
def test_b64decode_invalid_chars(self):
# issue 1466065: Test some invalid characters.
@@ -155,8 +164,11 @@ class BaseXYTestCase(unittest.TestCase):
(b'YWJj\nYWI=', b'abcab'))
for bstr, res in tests:
self.assertEqual(base64.b64decode(bstr), res)
+ self.assertEqual(base64.b64decode(bstr.decode('ascii')), res)
with self.assertRaises(binascii.Error):
base64.b64decode(bstr, validate=True)
+ with self.assertRaises(binascii.Error):
+ base64.b64decode(bstr.decode('ascii'), validate=True)
def test_b32encode(self):
eq = self.assertEqual
@@ -171,40 +183,63 @@ class BaseXYTestCase(unittest.TestCase):
def test_b32decode(self):
eq = self.assertEqual
- eq(base64.b32decode(b''), b'')
- eq(base64.b32decode(b'AA======'), b'\x00')
- eq(base64.b32decode(b'ME======'), b'a')
- eq(base64.b32decode(b'MFRA===='), b'ab')
- eq(base64.b32decode(b'MFRGG==='), b'abc')
- eq(base64.b32decode(b'MFRGGZA='), b'abcd')
- eq(base64.b32decode(b'MFRGGZDF'), b'abcde')
- self.assertRaises(TypeError, base64.b32decode, "")
+ tests = {b'': b'',
+ b'AA======': b'\x00',
+ b'ME======': b'a',
+ b'MFRA====': b'ab',
+ b'MFRGG===': b'abc',
+ b'MFRGGZA=': b'abcd',
+ b'MFRGGZDF': b'abcde',
+ }
+ for data, res in tests.items():
+ eq(base64.b32decode(data), res)
+ eq(base64.b32decode(data.decode('ascii')), res)
def test_b32decode_casefold(self):
eq = self.assertEqual
- eq(base64.b32decode(b'', True), b'')
- eq(base64.b32decode(b'ME======', True), b'a')
- eq(base64.b32decode(b'MFRA====', True), b'ab')
- eq(base64.b32decode(b'MFRGG===', True), b'abc')
- eq(base64.b32decode(b'MFRGGZA=', True), b'abcd')
- eq(base64.b32decode(b'MFRGGZDF', True), b'abcde')
- # Lower cases
- eq(base64.b32decode(b'me======', True), b'a')
- eq(base64.b32decode(b'mfra====', True), b'ab')
- eq(base64.b32decode(b'mfrgg===', True), b'abc')
- eq(base64.b32decode(b'mfrggza=', True), b'abcd')
- eq(base64.b32decode(b'mfrggzdf', True), b'abcde')
- # Expected exceptions
+ tests = {b'': b'',
+ b'ME======': b'a',
+ b'MFRA====': b'ab',
+ b'MFRGG===': b'abc',
+ b'MFRGGZA=': b'abcd',
+ b'MFRGGZDF': b'abcde',
+ # Lower cases
+ b'me======': b'a',
+ b'mfra====': b'ab',
+ b'mfrgg===': b'abc',
+ b'mfrggza=': b'abcd',
+ b'mfrggzdf': b'abcde',
+ }
+
+ for data, res in tests.items():
+ eq(base64.b32decode(data, True), res)
+ eq(base64.b32decode(data.decode('ascii'), True), res)
+
self.assertRaises(TypeError, base64.b32decode, b'me======')
+ self.assertRaises(TypeError, base64.b32decode, 'me======')
+
# Mapping zero and one
eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe')
- eq(base64.b32decode(b'M1023456', map01=b'L'), b'b\xdd\xad\xf3\xbe')
- eq(base64.b32decode(b'M1023456', map01=b'I'), b'b\x1d\xad\xf3\xbe')
- self.assertRaises(TypeError, base64.b32decode, b"", map01="")
+ eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe')
+
+ map_tests = {(b'M1023456', b'L'): b'b\xdd\xad\xf3\xbe',
+ (b'M1023456', b'I'): b'b\x1d\xad\xf3\xbe',
+ }
+ for (data, map01), res in map_tests.items():
+ data_str = data.decode('ascii')
+ map01_str = map01.decode('ascii')
+
+ eq(base64.b32decode(data, map01=map01), res)
+ eq(base64.b32decode(data_str, map01=map01), res)
+ eq(base64.b32decode(data, map01=map01_str), res)
+ eq(base64.b32decode(data_str, map01=map01_str), res)
def test_b32decode_error(self):
- self.assertRaises(binascii.Error, base64.b32decode, b'abc')
- self.assertRaises(binascii.Error, base64.b32decode, b'ABCDEF==')
+ for data in [b'abc', b'ABCDEF==']:
+ with self.assertRaises(binascii.Error):
+ base64.b32decode(data)
+ with self.assertRaises(binascii.Error):
+ base64.b32decode(data.decode('ascii'))
def test_b16encode(self):
eq = self.assertEqual
@@ -215,12 +250,24 @@ class BaseXYTestCase(unittest.TestCase):
def test_b16decode(self):
eq = self.assertEqual
eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
+ eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
eq(base64.b16decode(b'00'), b'\x00')
+ eq(base64.b16decode('00'), b'\x00')
# Lower case is not allowed without a flag
self.assertRaises(binascii.Error, base64.b16decode, b'0102abcdef')
+ self.assertRaises(binascii.Error, base64.b16decode, '0102abcdef')
# Case fold
eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef')
- self.assertRaises(TypeError, base64.b16decode, "")
+ eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef')
+
+ def test_decode_nonascii_str(self):
+ decode_funcs = (base64.b64decode,
+ base64.standard_b64decode,
+ base64.urlsafe_b64decode,
+ base64.b32decode,
+ base64.b16decode)
+ for f in decode_funcs:
+ self.assertRaises(ValueError, f, 'with non-ascii \xcb')
def test_ErrorHeritage(self):
self.assertTrue(issubclass(binascii.Error, ValueError))
diff --git a/Lib/test/test_bigmem.py b/Lib/test/test_bigmem.py
index f3c6ebb..0e54595 100644
--- a/Lib/test/test_bigmem.py
+++ b/Lib/test/test_bigmem.py
@@ -1,3 +1,13 @@
+"""Bigmem tests - tests for the 32-bit boundary in containers.
+
+These tests try to exercise the 32-bit boundary that is sometimes, if
+rarely, exceeded in practice, but almost never tested. They are really only
+meaningful on 64-bit builds on machines with a *lot* of memory, but the
+tests are always run, usually with very low memory limits to make sure the
+tests themselves don't suffer from bitrot. To run them for real, pass a
+high memory limit to regrtest, with the -M option.
+"""
+
from test import support
from test.support import bigmemtest, _1G, _2G, _4G
@@ -6,20 +16,35 @@ import operator
import sys
import functools
+# These tests all use one of the bigmemtest decorators to indicate how much
+# memory they use and how much memory they need to be even meaningful. The
+# decorators take two arguments: a 'memuse' indicator declaring
+# (approximate) bytes per size-unit the test will use (at peak usage), and a
+# 'minsize' indicator declaring a minimum *useful* size. A test that
+# allocates a bytestring to test various operations near the end will have a
+# minsize of at least 2Gb (or it wouldn't reach the 32-bit limit, so the
+# test wouldn't be very useful) and a memuse of 1 (one byte per size-unit,
+# if it allocates only one big string at a time.)
+#
+# When run with a memory limit set, both decorators skip tests that need
+# more memory than available to be meaningful. The precisionbigmemtest will
+# always pass minsize as size, even if there is much more memory available.
+# The bigmemtest decorator will scale size upward to fill available memory.
+#
# Bigmem testing houserules:
#
# - Try not to allocate too many large objects. It's okay to rely on
-# refcounting semantics, but don't forget that 's = create_largestring()'
+# refcounting semantics, and don't forget that 's = create_largestring()'
# doesn't release the old 's' (if it exists) until well after its new
# value has been created. Use 'del s' before the create_largestring call.
#
-# - Do *not* compare large objects using assertEqual or similar. It's a
-# lengthy operation and the errormessage will be utterly useless due to
-# its size. To make sure whether a result has the right contents, better
-# to use the strip or count methods, or compare meaningful slices.
+# - Do *not* compare large objects using assertEqual, assertIn or similar.
+# It's a lengthy operation and the errormessage will be utterly useless
+# due to its size. To make sure whether a result has the right contents,
+# better to use the strip or count methods, or compare meaningful slices.
#
# - Don't forget to test for large indices, offsets and results and such,
-# in addition to large sizes.
+# in addition to large sizes. Anything that probes the 32-bit boundary.
#
# - When repeating an object (say, a substring, or a small list) to create
# a large object, make the subobject of a length that is not a power of
@@ -37,13 +62,14 @@ import functools
# fail as well. I do not know whether it is due to memory fragmentation
# issues, or other specifics of the platform malloc() routine.
-character_size = 4 if sys.maxunicode > 0xFFFF else 2
+ascii_char_size = 1
+ucs2_char_size = 2
+ucs4_char_size = 4
class BaseStrTest:
- @bigmemtest(size=_2G, memuse=2)
- def test_capitalize(self, size):
+ def _test_capitalize(self, size):
_ = self.from_latin1
SUBSTR = self.from_latin1(' abc def ghi')
s = _('-') * size + SUBSTR
@@ -92,7 +118,7 @@ class BaseStrTest:
_ = self.from_latin1
s = _('-') * size
tabsize = 8
- self.assertEqual(s.expandtabs(), s)
+ self.assertTrue(s.expandtabs() == s)
del s
slen, remainder = divmod(size, tabsize)
s = _(' \t') * slen
@@ -347,7 +373,7 @@ class BaseStrTest:
# suffer for the list size. (Otherwise, it'd cost another 48 times
# size in bytes!) Nevertheless, a list of size takes
# 8*size bytes.
- @bigmemtest(size=_2G + 5, memuse=10)
+ @bigmemtest(size=_2G + 5, memuse=2 * ascii_char_size + 8)
def test_split_large(self, size):
_ = self.from_latin1
s = _(' a') * size + _(' ')
@@ -366,9 +392,9 @@ class BaseStrTest:
# take up an inordinate amount of memory
chunksize = int(size ** 0.5 + 2) // 2
SUBSTR = _(' ') * chunksize + _('\n') + _(' ') * chunksize + _('\r\n')
- s = SUBSTR * chunksize
+ s = SUBSTR * (chunksize * 2)
l = s.splitlines()
- self.assertEqual(len(l), chunksize * 2)
+ self.assertEqual(len(l), chunksize * 4)
expected = _(' ') * chunksize
for item in l:
self.assertEqual(item, expected)
@@ -394,8 +420,7 @@ class BaseStrTest:
self.assertEqual(len(s), size)
self.assertEqual(s.strip(), SUBSTR.strip())
- @bigmemtest(size=_2G, memuse=2)
- def test_swapcase(self, size):
+ def _test_swapcase(self, size):
_ = self.from_latin1
SUBSTR = _("aBcDeFG12.'\xa9\x00")
sublen = len(SUBSTR)
@@ -406,8 +431,7 @@ class BaseStrTest:
self.assertEqual(s[:sublen * 3], SUBSTR.swapcase() * 3)
self.assertEqual(s[-sublen * 3:], SUBSTR.swapcase() * 3)
- @bigmemtest(size=_2G, memuse=2)
- def test_title(self, size):
+ def _test_title(self, size):
_ = self.from_latin1
SUBSTR = _('SpaaHAaaAaham')
s = SUBSTR * (size // len(SUBSTR) + 2)
@@ -419,14 +443,7 @@ class BaseStrTest:
def test_translate(self, size):
_ = self.from_latin1
SUBSTR = _('aZz.z.Aaz.')
- if isinstance(SUBSTR, str):
- trans = {
- ord(_('.')): _('-'),
- ord(_('a')): _('!'),
- ord(_('Z')): _('$'),
- }
- else:
- trans = bytes.maketrans(b'.aZ', b'-!$')
+ trans = bytes.maketrans(b'.aZ', b'-!$')
sublen = len(SUBSTR)
repeats = size // sublen + 2
s = SUBSTR * repeats
@@ -519,19 +536,19 @@ class BaseStrTest:
edge = _('-') * (size // 2)
s = _('').join([edge, SUBSTR, edge])
del edge
- self.assertIn(SUBSTR, s)
- self.assertNotIn(SUBSTR * 2, s)
- self.assertIn(_('-'), s)
- self.assertNotIn(_('a'), s)
+ self.assertTrue(SUBSTR in s)
+ self.assertFalse(SUBSTR * 2 in s)
+ self.assertTrue(_('-') in s)
+ self.assertFalse(_('a') in s)
s += _('a')
- self.assertIn(_('a'), s)
+ self.assertTrue(_('a') in s)
@bigmemtest(size=_2G + 10, memuse=2)
def test_compare(self, size):
_ = self.from_latin1
s1 = _('-') * size
s2 = _('-') * size
- self.assertEqual(s1, s2)
+ self.assertTrue(s1 == s2)
del s2
s2 = s1 + _('a')
self.assertFalse(s1 == s2)
@@ -552,7 +569,7 @@ class BaseStrTest:
h1 = hash(s)
del s
s = _('\x00') * (size + 1)
- self.assertFalse(h1 == hash(s))
+ self.assertNotEqual(h1, hash(s))
class StrTest(unittest.TestCase, BaseStrTest):
@@ -563,7 +580,6 @@ class StrTest(unittest.TestCase, BaseStrTest):
def basic_encode_test(self, size, enc, c='.', expectedsize=None):
if expectedsize is None:
expectedsize = size
-
try:
s = c * size
self.assertEqual(len(s.encode(enc)), expectedsize)
@@ -582,48 +598,64 @@ class StrTest(unittest.TestCase, BaseStrTest):
memuse = meth.memuse
except AttributeError:
continue
- meth.memuse = character_size * memuse
+ meth.memuse = ascii_char_size * memuse
self._adjusted[name] = memuse
def tearDown(self):
for name, memuse in self._adjusted.items():
getattr(type(self), name).memuse = memuse
- # the utf8 encoder preallocates big time (4x the number of characters)
- @bigmemtest(size=_2G + 2, memuse=character_size + 4)
+ @bigmemtest(size=_2G, memuse=ucs4_char_size * 3)
+ def test_capitalize(self, size):
+ self._test_capitalize(size)
+
+ @bigmemtest(size=_2G, memuse=ucs4_char_size * 3)
+ def test_title(self, size):
+ self._test_title(size)
+
+ @bigmemtest(size=_2G, memuse=ucs4_char_size * 3)
+ def test_swapcase(self, size):
+ self._test_swapcase(size)
+
+ # Many codecs convert to the legacy representation first, explaining
+ # why we add 'ucs4_char_size' to the 'memuse' below.
+
+ @bigmemtest(size=_2G + 2, memuse=ascii_char_size + 1)
def test_encode(self, size):
return self.basic_encode_test(size, 'utf-8')
- @bigmemtest(size=_4G // 6 + 2, memuse=character_size + 1)
+ @bigmemtest(size=_4G // 6 + 2, memuse=ascii_char_size + ucs4_char_size + 1)
def test_encode_raw_unicode_escape(self, size):
try:
return self.basic_encode_test(size, 'raw_unicode_escape')
except MemoryError:
pass # acceptable on 32-bit
- @bigmemtest(size=_4G // 5 + 70, memuse=character_size + 1)
+ @bigmemtest(size=_4G // 5 + 70, memuse=ascii_char_size + ucs4_char_size + 1)
def test_encode_utf7(self, size):
try:
return self.basic_encode_test(size, 'utf7')
except MemoryError:
pass # acceptable on 32-bit
- @bigmemtest(size=_4G // 4 + 5, memuse=character_size + 4)
+ @bigmemtest(size=_4G // 4 + 5, memuse=ascii_char_size + ucs4_char_size + 4)
def test_encode_utf32(self, size):
try:
- return self.basic_encode_test(size, 'utf32', expectedsize=4*size+4)
+ return self.basic_encode_test(size, 'utf32', expectedsize=4 * size + 4)
except MemoryError:
pass # acceptable on 32-bit
- @bigmemtest(size=_2G - 1, memuse=character_size + 1)
+ @bigmemtest(size=_2G - 1, memuse=ascii_char_size + 1)
def test_encode_ascii(self, size):
return self.basic_encode_test(size, 'ascii', c='A')
- @bigmemtest(size=_2G + 10, memuse=character_size * 2)
+ # str % (...) uses a Py_UCS4 intermediate representation
+
+ @bigmemtest(size=_2G + 10, memuse=ascii_char_size * 2 + ucs4_char_size)
def test_format(self, size):
s = '-' * size
sf = '%s' % (s,)
- self.assertEqual(s, sf)
+ self.assertTrue(s == sf)
del sf
sf = '..%s..' % (s,)
self.assertEqual(len(sf), len(s) + 4)
@@ -640,7 +672,7 @@ class StrTest(unittest.TestCase, BaseStrTest):
self.assertEqual(s.count('.'), 3)
self.assertEqual(s.count('-'), size * 2)
- @bigmemtest(size=_2G + 10, memuse=character_size * 2)
+ @bigmemtest(size=_2G + 10, memuse=ascii_char_size * 2)
def test_repr_small(self, size):
s = '-' * size
s = repr(s)
@@ -661,7 +693,7 @@ class StrTest(unittest.TestCase, BaseStrTest):
self.assertEqual(s.count('\\'), size)
self.assertEqual(s.count('0'), size * 2)
- @bigmemtest(size=_2G + 10, memuse=character_size * 5)
+ @bigmemtest(size=_2G + 10, memuse=ascii_char_size * 5)
def test_repr_large(self, size):
s = '\x00' * size
s = repr(s)
@@ -671,7 +703,13 @@ class StrTest(unittest.TestCase, BaseStrTest):
self.assertEqual(s.count('\\'), size)
self.assertEqual(s.count('0'), size * 2)
- @bigmemtest(size=_2G // 5 + 1, memuse=character_size * 7)
+ # ascii() calls encode('ascii', 'backslashreplace'), which itself
+ # creates a temporary Py_UNICODE representation in addition to the
+ # original (Py_UCS2) one
+ # There's also some overallocation when resizing the ascii() result
+ # that isn't taken into account here.
+ @bigmemtest(size=_2G // 5 + 1, memuse=ucs2_char_size +
+ ucs4_char_size + ascii_char_size * 6)
def test_unicode_repr(self, size):
# Use an assigned, but not printable code point.
# It is in the range of the low surrogates \uDC00-\uDFFF.
@@ -686,9 +724,7 @@ class StrTest(unittest.TestCase, BaseStrTest):
finally:
r = s = None
- # The character takes 4 bytes even in UCS-2 builds because it will
- # be decomposed into surrogates.
- @bigmemtest(size=_2G // 5 + 1, memuse=4 + character_size * 9)
+ @bigmemtest(size=_2G // 5 + 1, memuse=ucs4_char_size * 2 + ascii_char_size * 10)
def test_unicode_repr_wide(self, size):
char = "\U0001DCBA"
s = char * size
@@ -701,39 +737,76 @@ class StrTest(unittest.TestCase, BaseStrTest):
finally:
r = s = None
- @bigmemtest(size=_4G // 5, memuse=character_size * (6 + 1))
- def _test_unicode_repr_overflow(self, size):
- # XXX not sure what this test is about
- char = "\uDCBA"
- s = char * size
- try:
- r = repr(s)
- self.assertTrue(s == eval(r))
- finally:
- r = s = None
+ # The original test_translate is overriden here, so as to get the
+ # correct size estimate: str.translate() uses an intermediate Py_UCS4
+ # representation.
+
+ @bigmemtest(size=_2G, memuse=ascii_char_size * 2 + ucs4_char_size)
+ def test_translate(self, size):
+ _ = self.from_latin1
+ SUBSTR = _('aZz.z.Aaz.')
+ trans = {
+ ord(_('.')): _('-'),
+ ord(_('a')): _('!'),
+ ord(_('Z')): _('$'),
+ }
+ sublen = len(SUBSTR)
+ repeats = size // sublen + 2
+ s = SUBSTR * repeats
+ s = s.translate(trans)
+ self.assertEqual(len(s), repeats * sublen)
+ self.assertEqual(s[:sublen], SUBSTR.translate(trans))
+ self.assertEqual(s[-sublen:], SUBSTR.translate(trans))
+ self.assertEqual(s.count(_('.')), 0)
+ self.assertEqual(s.count(_('!')), repeats * 2)
+ self.assertEqual(s.count(_('z')), repeats * 3)
class BytesTest(unittest.TestCase, BaseStrTest):
def from_latin1(self, s):
- return s.encode("latin1")
+ return s.encode("latin-1")
- @bigmemtest(size=_2G + 2, memuse=1 + character_size)
+ @bigmemtest(size=_2G + 2, memuse=1 + ascii_char_size)
def test_decode(self, size):
s = self.from_latin1('.') * size
self.assertEqual(len(s.decode('utf-8')), size)
+ @bigmemtest(size=_2G, memuse=2)
+ def test_capitalize(self, size):
+ self._test_capitalize(size)
+
+ @bigmemtest(size=_2G, memuse=2)
+ def test_title(self, size):
+ self._test_title(size)
+
+ @bigmemtest(size=_2G, memuse=2)
+ def test_swapcase(self, size):
+ self._test_swapcase(size)
+
class BytearrayTest(unittest.TestCase, BaseStrTest):
def from_latin1(self, s):
- return bytearray(s.encode("latin1"))
+ return bytearray(s.encode("latin-1"))
- @bigmemtest(size=_2G + 2, memuse=1 + character_size)
+ @bigmemtest(size=_2G + 2, memuse=1 + ascii_char_size)
def test_decode(self, size):
s = self.from_latin1('.') * size
self.assertEqual(len(s.decode('utf-8')), size)
+ @bigmemtest(size=_2G, memuse=2)
+ def test_capitalize(self, size):
+ self._test_capitalize(size)
+
+ @bigmemtest(size=_2G, memuse=2)
+ def test_title(self, size):
+ self._test_title(size)
+
+ @bigmemtest(size=_2G, memuse=2)
+ def test_swapcase(self, size):
+ self._test_swapcase(size)
+
test_hash = None
test_split_large = None
@@ -752,7 +825,7 @@ class TupleTest(unittest.TestCase):
def test_compare(self, size):
t1 = ('',) * size
t2 = ('',) * size
- self.assertEqual(t1, t2)
+ self.assertTrue(t1 == t2)
del t2
t2 = ('',) * (size + 1)
self.assertFalse(t1 == t2)
@@ -783,9 +856,9 @@ class TupleTest(unittest.TestCase):
def test_contains(self, size):
t = (1, 2, 3, 4, 5) * size
self.assertEqual(len(t), size * 5)
- self.assertIn(5, t)
- self.assertNotIn((1, 2, 3, 4, 5), t)
- self.assertNotIn(0, t)
+ self.assertTrue(5 in t)
+ self.assertFalse((1, 2, 3, 4, 5) in t)
+ self.assertFalse(0 in t)
@bigmemtest(size=_2G + 10, memuse=8)
def test_hash(self, size):
@@ -869,11 +942,11 @@ class TupleTest(unittest.TestCase):
self.assertEqual(s[-5:], '0, 0)')
self.assertEqual(s.count('0'), size)
- @bigmemtest(size=_2G // 3 + 2, memuse=8 + 3 * character_size)
+ @bigmemtest(size=_2G // 3 + 2, memuse=8 + 3 * ascii_char_size)
def test_repr_small(self, size):
return self.basic_test_repr(size)
- @bigmemtest(size=_2G + 2, memuse=8 + 3 * character_size)
+ @bigmemtest(size=_2G + 2, memuse=8 + 3 * ascii_char_size)
def test_repr_large(self, size):
return self.basic_test_repr(size)
@@ -888,7 +961,7 @@ class ListTest(unittest.TestCase):
def test_compare(self, size):
l1 = [''] * size
l2 = [''] * size
- self.assertEqual(l1, l2)
+ self.assertTrue(l1 == l2)
del l2
l2 = [''] * (size + 1)
self.assertFalse(l1 == l2)
@@ -934,9 +1007,9 @@ class ListTest(unittest.TestCase):
def test_contains(self, size):
l = [1, 2, 3, 4, 5] * size
self.assertEqual(len(l), size * 5)
- self.assertIn(5, l)
- self.assertNotIn([1, 2, 3, 4, 5], l)
- self.assertNotIn(0, l)
+ self.assertTrue(5 in l)
+ self.assertFalse([1, 2, 3, 4, 5] in l)
+ self.assertFalse(0 in l)
@bigmemtest(size=_2G + 10, memuse=8)
def test_hash(self, size):
@@ -1044,11 +1117,11 @@ class ListTest(unittest.TestCase):
self.assertEqual(s[-5:], '0, 0]')
self.assertEqual(s.count('0'), size)
- @bigmemtest(size=_2G // 3 + 2, memuse=8 + 3 * character_size)
+ @bigmemtest(size=_2G // 3 + 2, memuse=8 + 3 * ascii_char_size)
def test_repr_small(self, size):
return self.basic_test_repr(size)
- @bigmemtest(size=_2G + 2, memuse=8 + 3 * character_size)
+ @bigmemtest(size=_2G + 2, memuse=8 + 3 * ascii_char_size)
def test_repr_large(self, size):
return self.basic_test_repr(size)
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 1e9e888..04d8f9d 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -208,9 +208,9 @@ class BinASCIITest(unittest.TestCase):
except Exception as err:
self.fail("{}({!r}) raises {!r}".format(func, empty, err))
- def test_unicode_strings(self):
- # Unicode strings are not accepted.
- for func in all_functions:
+ def test_unicode_b2a(self):
+ # Unicode strings are not accepted by b2a_* functions.
+ for func in set(all_functions) - set(a2b_functions) | {'rledecode_hqx'}:
try:
self.assertRaises(TypeError, getattr(binascii, func), "test")
except Exception as err:
@@ -218,6 +218,34 @@ class BinASCIITest(unittest.TestCase):
# crc_hqx needs 2 arguments
self.assertRaises(TypeError, binascii.crc_hqx, "test", 0)
+ def test_unicode_a2b(self):
+ # Unicode strings are accepted by a2b_* functions.
+ MAX_ALL = 45
+ raw = self.rawdata[:MAX_ALL]
+ for fa, fb in zip(a2b_functions, b2a_functions):
+ if fa == 'rledecode_hqx':
+ # Takes non-ASCII data
+ continue
+ a2b = getattr(binascii, fa)
+ b2a = getattr(binascii, fb)
+ try:
+ a = b2a(self.type2test(raw))
+ binary_res = a2b(a)
+ a = a.decode('ascii')
+ res = a2b(a)
+ except Exception as err:
+ self.fail("{}/{} conversion raises {!r}".format(fb, fa, err))
+ if fb == 'b2a_hqx':
+ # b2a_hqx returns a tuple
+ res, _ = res
+ binary_res, _ = binary_res
+ self.assertEqual(res, raw, "{}/{} conversion: "
+ "{!r} != {!r}".format(fb, fa, res, raw))
+ self.assertEqual(res, binary_res)
+ self.assertIsInstance(res, bytes)
+ # non-ASCII string
+ self.assertRaises(ValueError, a2b, "\x80")
+
class ArrayBinASCIITest(BinASCIITest):
def type2test(self, s):
diff --git a/Lib/test/test_bool.py b/Lib/test/test_bool.py
index b296870..4bab28b 100644
--- a/Lib/test/test_bool.py
+++ b/Lib/test/test_bool.py
@@ -330,6 +330,16 @@ class BoolTest(unittest.TestCase):
except (Exception) as e_len:
self.assertEqual(str(e_bool), str(e_len))
+ def test_real_and_imag(self):
+ self.assertEqual(True.real, 1)
+ self.assertEqual(True.imag, 0)
+ self.assertIs(type(True.real), int)
+ self.assertIs(type(True.imag), int)
+ self.assertEqual(False.real, 0)
+ self.assertEqual(False.imag, 0)
+ self.assertIs(type(False.real), int)
+ self.assertIs(type(False.imag), int)
+
def test_main():
support.run_unittest(BoolTest)
diff --git a/Lib/test/test_buffer.py b/Lib/test/test_buffer.py
new file mode 100644
index 0000000..26cd9be
--- /dev/null
+++ b/Lib/test/test_buffer.py
@@ -0,0 +1,4273 @@
+#
+# The ndarray object from _testbuffer.c is a complete implementation of
+# a PEP-3118 buffer provider. It is independent from NumPy's ndarray
+# and the tests don't require NumPy.
+#
+# If NumPy is present, some tests check both ndarray implementations
+# against each other.
+#
+# Most ndarray tests also check that memoryview(ndarray) behaves in
+# the same way as the original. Thus, a substantial part of the
+# memoryview tests is now in this module.
+#
+
+import unittest
+from test import support
+from itertools import permutations, product
+from random import randrange, sample, choice
+from sysconfig import get_config_var
+import warnings
+import sys, array, io
+from decimal import Decimal
+from fractions import Fraction
+
+try:
+ from _testbuffer import *
+except ImportError:
+ ndarray = None
+
+try:
+ import struct
+except ImportError:
+ struct = None
+
+try:
+ import ctypes
+except ImportError:
+ ctypes = None
+
+try:
+ with warnings.catch_warnings():
+ from numpy import ndarray as numpy_array
+except ImportError:
+ numpy_array = None
+
+
+SHORT_TEST = True
+
+
+# ======================================================================
+# Random lists by format specifier
+# ======================================================================
+
+# Native format chars and their ranges.
+NATIVE = {
+ '?':0, 'c':0, 'b':0, 'B':0,
+ 'h':0, 'H':0, 'i':0, 'I':0,
+ 'l':0, 'L':0, 'n':0, 'N':0,
+ 'f':0, 'd':0, 'P':0
+}
+
+# NumPy does not have 'n' or 'N':
+if numpy_array:
+ del NATIVE['n']
+ del NATIVE['N']
+
+if struct:
+ try:
+ # Add "qQ" if present in native mode.
+ struct.pack('Q', 2**64-1)
+ NATIVE['q'] = 0
+ NATIVE['Q'] = 0
+ except struct.error:
+ pass
+
+# Standard format chars and their ranges.
+STANDARD = {
+ '?':(0, 2), 'c':(0, 1<<8),
+ 'b':(-(1<<7), 1<<7), 'B':(0, 1<<8),
+ 'h':(-(1<<15), 1<<15), 'H':(0, 1<<16),
+ 'i':(-(1<<31), 1<<31), 'I':(0, 1<<32),
+ 'l':(-(1<<31), 1<<31), 'L':(0, 1<<32),
+ 'q':(-(1<<63), 1<<63), 'Q':(0, 1<<64),
+ 'f':(-(1<<63), 1<<63), 'd':(-(1<<1023), 1<<1023)
+}
+
+def native_type_range(fmt):
+ """Return range of a native type."""
+ if fmt == 'c':
+ lh = (0, 256)
+ elif fmt == '?':
+ lh = (0, 2)
+ elif fmt == 'f':
+ lh = (-(1<<63), 1<<63)
+ elif fmt == 'd':
+ lh = (-(1<<1023), 1<<1023)
+ else:
+ for exp in (128, 127, 64, 63, 32, 31, 16, 15, 8, 7):
+ try:
+ struct.pack(fmt, (1<<exp)-1)
+ break
+ except struct.error:
+ pass
+ lh = (-(1<<exp), 1<<exp) if exp & 1 else (0, 1<<exp)
+ return lh
+
+fmtdict = {
+ '':NATIVE,
+ '@':NATIVE,
+ '<':STANDARD,
+ '>':STANDARD,
+ '=':STANDARD,
+ '!':STANDARD
+}
+
+if struct:
+ for fmt in fmtdict['@']:
+ fmtdict['@'][fmt] = native_type_range(fmt)
+
+MEMORYVIEW = NATIVE.copy()
+ARRAY = NATIVE.copy()
+for k in NATIVE:
+ if not k in "bBhHiIlLfd":
+ del ARRAY[k]
+
+BYTEFMT = NATIVE.copy()
+for k in NATIVE:
+ if not k in "Bbc":
+ del BYTEFMT[k]
+
+fmtdict['m'] = MEMORYVIEW
+fmtdict['@m'] = MEMORYVIEW
+fmtdict['a'] = ARRAY
+fmtdict['b'] = BYTEFMT
+fmtdict['@b'] = BYTEFMT
+
+# Capabilities of the test objects:
+MODE = 0
+MULT = 1
+cap = { # format chars # multiplier
+ 'ndarray': (['', '@', '<', '>', '=', '!'], ['', '1', '2', '3']),
+ 'array': (['a'], ['']),
+ 'numpy': ([''], ['']),
+ 'memoryview': (['@m', 'm'], ['']),
+ 'bytefmt': (['@b', 'b'], ['']),
+}
+
+def randrange_fmt(mode, char, obj):
+ """Return random item for a type specified by a mode and a single
+ format character."""
+ x = randrange(*fmtdict[mode][char])
+ if char == 'c':
+ x = bytes(chr(x), 'latin1')
+ if char == '?':
+ x = bool(x)
+ if char == 'f' or char == 'd':
+ x = struct.pack(char, x)
+ x = struct.unpack(char, x)[0]
+ if obj == 'numpy' and x == b'\x00':
+ # http://projects.scipy.org/numpy/ticket/1925
+ x = b'\x01'
+ return x
+
+def gen_item(fmt, obj):
+ """Return single random item."""
+ mode, chars = fmt.split('#')
+ x = []
+ for c in chars:
+ x.append(randrange_fmt(mode, c, obj))
+ return x[0] if len(x) == 1 else tuple(x)
+
+def gen_items(n, fmt, obj):
+ """Return a list of random items (or a scalar)."""
+ if n == 0:
+ return gen_item(fmt, obj)
+ lst = [0] * n
+ for i in range(n):
+ lst[i] = gen_item(fmt, obj)
+ return lst
+
+def struct_items(n, obj):
+ mode = choice(cap[obj][MODE])
+ xfmt = mode + '#'
+ fmt = mode.strip('amb')
+ nmemb = randrange(2, 10) # number of struct members
+ for _ in range(nmemb):
+ char = choice(tuple(fmtdict[mode]))
+ multiplier = choice(cap[obj][MULT])
+ xfmt += (char * int(multiplier if multiplier else 1))
+ fmt += (multiplier + char)
+ items = gen_items(n, xfmt, obj)
+ item = gen_item(xfmt, obj)
+ return fmt, items, item
+
+def randitems(n, obj='ndarray', mode=None, char=None):
+ """Return random format, items, item."""
+ if mode is None:
+ mode = choice(cap[obj][MODE])
+ if char is None:
+ char = choice(tuple(fmtdict[mode]))
+ multiplier = choice(cap[obj][MULT])
+ fmt = mode + '#' + char * int(multiplier if multiplier else 1)
+ items = gen_items(n, fmt, obj)
+ item = gen_item(fmt, obj)
+ fmt = mode.strip('amb') + multiplier + char
+ return fmt, items, item
+
+def iter_mode(n, obj='ndarray'):
+ """Iterate through supported mode/char combinations."""
+ for mode in cap[obj][MODE]:
+ for char in fmtdict[mode]:
+ yield randitems(n, obj, mode, char)
+
+def iter_format(nitems, testobj='ndarray'):
+ """Yield (format, items, item) for all possible modes and format
+ characters plus one random compound format string."""
+ for t in iter_mode(nitems, testobj):
+ yield t
+ if testobj != 'ndarray':
+ raise StopIteration
+ yield struct_items(nitems, testobj)
+
+
+def is_byte_format(fmt):
+ return 'c' in fmt or 'b' in fmt or 'B' in fmt
+
+def is_memoryview_format(fmt):
+ """format suitable for memoryview"""
+ x = len(fmt)
+ return ((x == 1 or (x == 2 and fmt[0] == '@')) and
+ fmt[x-1] in MEMORYVIEW)
+
+NON_BYTE_FORMAT = [c for c in fmtdict['@'] if not is_byte_format(c)]
+
+
+# ======================================================================
+# Multi-dimensional tolist(), slicing and slice assignments
+# ======================================================================
+
+def atomp(lst):
+ """Tuple items (representing structs) are regarded as atoms."""
+ return not isinstance(lst, list)
+
+def listp(lst):
+ return isinstance(lst, list)
+
+def prod(lst):
+ """Product of list elements."""
+ if len(lst) == 0:
+ return 0
+ x = lst[0]
+ for v in lst[1:]:
+ x *= v
+ return x
+
+def strides_from_shape(ndim, shape, itemsize, layout):
+ """Calculate strides of a contiguous array. Layout is 'C' or
+ 'F' (Fortran)."""
+ if ndim == 0:
+ return ()
+ if layout == 'C':
+ strides = list(shape[1:]) + [itemsize]
+ for i in range(ndim-2, -1, -1):
+ strides[i] *= strides[i+1]
+ else:
+ strides = [itemsize] + list(shape[:-1])
+ for i in range(1, ndim):
+ strides[i] *= strides[i-1]
+ return strides
+
+def _ca(items, s):
+ """Convert flat item list to the nested list representation of a
+ multidimensional C array with shape 's'."""
+ if atomp(items):
+ return items
+ if len(s) == 0:
+ return items[0]
+ lst = [0] * s[0]
+ stride = len(items) // s[0] if s[0] else 0
+ for i in range(s[0]):
+ start = i*stride
+ lst[i] = _ca(items[start:start+stride], s[1:])
+ return lst
+
+def _fa(items, s):
+ """Convert flat item list to the nested list representation of a
+ multidimensional Fortran array with shape 's'."""
+ if atomp(items):
+ return items
+ if len(s) == 0:
+ return items[0]
+ lst = [0] * s[0]
+ stride = s[0]
+ for i in range(s[0]):
+ lst[i] = _fa(items[i::stride], s[1:])
+ return lst
+
+def carray(items, shape):
+ if listp(items) and not 0 in shape and prod(shape) != len(items):
+ raise ValueError("prod(shape) != len(items)")
+ return _ca(items, shape)
+
+def farray(items, shape):
+ if listp(items) and not 0 in shape and prod(shape) != len(items):
+ raise ValueError("prod(shape) != len(items)")
+ return _fa(items, shape)
+
+def indices(shape):
+ """Generate all possible tuples of indices."""
+ iterables = [range(v) for v in shape]
+ return product(*iterables)
+
+def getindex(ndim, ind, strides):
+ """Convert multi-dimensional index to the position in the flat list."""
+ ret = 0
+ for i in range(ndim):
+ ret += strides[i] * ind[i]
+ return ret
+
+def transpose(src, shape):
+ """Transpose flat item list that is regarded as a multi-dimensional
+ matrix defined by shape: dest...[k][j][i] = src[i][j][k]... """
+ if not shape:
+ return src
+ ndim = len(shape)
+ sstrides = strides_from_shape(ndim, shape, 1, 'C')
+ dstrides = strides_from_shape(ndim, shape[::-1], 1, 'C')
+ dest = [0] * len(src)
+ for ind in indices(shape):
+ fr = getindex(ndim, ind, sstrides)
+ to = getindex(ndim, ind[::-1], dstrides)
+ dest[to] = src[fr]
+ return dest
+
+def _flatten(lst):
+ """flatten list"""
+ if lst == []:
+ return lst
+ if atomp(lst):
+ return [lst]
+ return _flatten(lst[0]) + _flatten(lst[1:])
+
+def flatten(lst):
+ """flatten list or return scalar"""
+ if atomp(lst): # scalar
+ return lst
+ return _flatten(lst)
+
+def slice_shape(lst, slices):
+ """Get the shape of lst after slicing: slices is a list of slice
+ objects."""
+ if atomp(lst):
+ return []
+ return [len(lst[slices[0]])] + slice_shape(lst[0], slices[1:])
+
+def multislice(lst, slices):
+ """Multi-dimensional slicing: slices is a list of slice objects."""
+ if atomp(lst):
+ return lst
+ return [multislice(sublst, slices[1:]) for sublst in lst[slices[0]]]
+
+def m_assign(llst, rlst, lslices, rslices):
+ """Multi-dimensional slice assignment: llst and rlst are the operands,
+ lslices and rslices are lists of slice objects. llst and rlst must
+ have the same structure.
+
+ For a two-dimensional example, this is not implemented in Python:
+
+ llst[0:3:2, 0:3:2] = rlst[1:3:1, 1:3:1]
+
+ Instead we write:
+
+ lslices = [slice(0,3,2), slice(0,3,2)]
+ rslices = [slice(1,3,1), slice(1,3,1)]
+ multislice_assign(llst, rlst, lslices, rslices)
+ """
+ if atomp(rlst):
+ return rlst
+ rlst = [m_assign(l, r, lslices[1:], rslices[1:])
+ for l, r in zip(llst[lslices[0]], rlst[rslices[0]])]
+ llst[lslices[0]] = rlst
+ return llst
+
+def cmp_structure(llst, rlst, lslices, rslices):
+ """Compare the structure of llst[lslices] and rlst[rslices]."""
+ lshape = slice_shape(llst, lslices)
+ rshape = slice_shape(rlst, rslices)
+ if (len(lshape) != len(rshape)):
+ return -1
+ for i in range(len(lshape)):
+ if lshape[i] != rshape[i]:
+ return -1
+ if lshape[i] == 0:
+ return 0
+ return 0
+
+def multislice_assign(llst, rlst, lslices, rslices):
+ """Return llst after assigning: llst[lslices] = rlst[rslices]"""
+ if cmp_structure(llst, rlst, lslices, rslices) < 0:
+ raise ValueError("lvalue and rvalue have different structures")
+ return m_assign(llst, rlst, lslices, rslices)
+
+
+# ======================================================================
+# Random structures
+# ======================================================================
+
+#
+# PEP-3118 is very permissive with respect to the contents of a
+# Py_buffer. In particular:
+#
+# - shape can be zero
+# - strides can be any integer, including zero
+# - offset can point to any location in the underlying
+# memory block, provided that it is a multiple of
+# itemsize.
+#
+# The functions in this section test and verify random structures
+# in full generality. A structure is valid iff it fits in the
+# underlying memory block.
+#
+# The structure 't' (short for 'tuple') is fully defined by:
+#
+# t = (memlen, itemsize, ndim, shape, strides, offset)
+#
+
+def verify_structure(memlen, itemsize, ndim, shape, strides, offset):
+ """Verify that the parameters represent a valid array within
+ the bounds of the allocated memory:
+ char *mem: start of the physical memory block
+ memlen: length of the physical memory block
+ offset: (char *)buf - mem
+ """
+ if offset % itemsize:
+ return False
+ if offset < 0 or offset+itemsize > memlen:
+ return False
+ if any(v % itemsize for v in strides):
+ return False
+
+ if ndim <= 0:
+ return ndim == 0 and not shape and not strides
+ if 0 in shape:
+ return True
+
+ imin = sum(strides[j]*(shape[j]-1) for j in range(ndim)
+ if strides[j] <= 0)
+ imax = sum(strides[j]*(shape[j]-1) for j in range(ndim)
+ if strides[j] > 0)
+
+ return 0 <= offset+imin and offset+imax+itemsize <= memlen
+
+def get_item(lst, indices):
+ for i in indices:
+ lst = lst[i]
+ return lst
+
+def memory_index(indices, t):
+ """Location of an item in the underlying memory."""
+ memlen, itemsize, ndim, shape, strides, offset = t
+ p = offset
+ for i in range(ndim):
+ p += strides[i]*indices[i]
+ return p
+
+def is_overlapping(t):
+ """The structure 't' is overlapping if at least one memory location
+ is visited twice while iterating through all possible tuples of
+ indices."""
+ memlen, itemsize, ndim, shape, strides, offset = t
+ visited = 1<<memlen
+ for ind in indices(shape):
+ i = memory_index(ind, t)
+ bit = 1<<i
+ if visited & bit:
+ return True
+ visited |= bit
+ return False
+
+def rand_structure(itemsize, valid, maxdim=5, maxshape=16, shape=()):
+ """Return random structure:
+ (memlen, itemsize, ndim, shape, strides, offset)
+ If 'valid' is true, the returned structure is valid, otherwise invalid.
+ If 'shape' is given, use that instead of creating a random shape.
+ """
+ if not shape:
+ ndim = randrange(maxdim+1)
+ if (ndim == 0):
+ if valid:
+ return itemsize, itemsize, ndim, (), (), 0
+ else:
+ nitems = randrange(1, 16+1)
+ memlen = nitems * itemsize
+ offset = -itemsize if randrange(2) == 0 else memlen
+ return memlen, itemsize, ndim, (), (), offset
+
+ minshape = 2
+ n = randrange(100)
+ if n >= 95 and valid:
+ minshape = 0
+ elif n >= 90:
+ minshape = 1
+ shape = [0] * ndim
+
+ for i in range(ndim):
+ shape[i] = randrange(minshape, maxshape+1)
+ else:
+ ndim = len(shape)
+
+ maxstride = 5
+ n = randrange(100)
+ zero_stride = True if n >= 95 and n & 1 else False
+
+ strides = [0] * ndim
+ strides[ndim-1] = itemsize * randrange(-maxstride, maxstride+1)
+ if not zero_stride and strides[ndim-1] == 0:
+ strides[ndim-1] = itemsize
+
+ for i in range(ndim-2, -1, -1):
+ maxstride *= shape[i+1] if shape[i+1] else 1
+ if zero_stride:
+ strides[i] = itemsize * randrange(-maxstride, maxstride+1)
+ else:
+ strides[i] = ((1,-1)[randrange(2)] *
+ itemsize * randrange(1, maxstride+1))
+
+ imin = imax = 0
+ if not 0 in shape:
+ imin = sum(strides[j]*(shape[j]-1) for j in range(ndim)
+ if strides[j] <= 0)
+ imax = sum(strides[j]*(shape[j]-1) for j in range(ndim)
+ if strides[j] > 0)
+
+ nitems = imax - imin
+ if valid:
+ offset = -imin * itemsize
+ memlen = offset + (imax+1) * itemsize
+ else:
+ memlen = (-imin + imax) * itemsize
+ offset = -imin-itemsize if randrange(2) == 0 else memlen
+ return memlen, itemsize, ndim, shape, strides, offset
+
+def randslice_from_slicelen(slicelen, listlen):
+ """Create a random slice of len slicelen that fits into listlen."""
+ maxstart = listlen - slicelen
+ start = randrange(maxstart+1)
+ maxstep = (listlen - start) // slicelen if slicelen else 1
+ step = randrange(1, maxstep+1)
+ stop = start + slicelen * step
+ s = slice(start, stop, step)
+ _, _, _, control = slice_indices(s, listlen)
+ if control != slicelen:
+ raise RuntimeError
+ return s
+
+def randslice_from_shape(ndim, shape):
+ """Create two sets of slices for an array x with shape 'shape'
+ such that shapeof(x[lslices]) == shapeof(x[rslices])."""
+ lslices = [0] * ndim
+ rslices = [0] * ndim
+ for n in range(ndim):
+ l = shape[n]
+ slicelen = randrange(1, l+1) if l > 0 else 0
+ lslices[n] = randslice_from_slicelen(slicelen, l)
+ rslices[n] = randslice_from_slicelen(slicelen, l)
+ return tuple(lslices), tuple(rslices)
+
+def rand_aligned_slices(maxdim=5, maxshape=16):
+ """Create (lshape, rshape, tuple(lslices), tuple(rslices)) such that
+ shapeof(x[lslices]) == shapeof(y[rslices]), where x is an array
+ with shape 'lshape' and y is an array with shape 'rshape'."""
+ ndim = randrange(1, maxdim+1)
+ minshape = 2
+ n = randrange(100)
+ if n >= 95:
+ minshape = 0
+ elif n >= 90:
+ minshape = 1
+ all_random = True if randrange(100) >= 80 else False
+ lshape = [0]*ndim; rshape = [0]*ndim
+ lslices = [0]*ndim; rslices = [0]*ndim
+
+ for n in range(ndim):
+ small = randrange(minshape, maxshape+1)
+ big = randrange(minshape, maxshape+1)
+ if big < small:
+ big, small = small, big
+
+ # Create a slice that fits the smaller value.
+ if all_random:
+ start = randrange(-small, small+1)
+ stop = randrange(-small, small+1)
+ step = (1,-1)[randrange(2)] * randrange(1, small+2)
+ s_small = slice(start, stop, step)
+ _, _, _, slicelen = slice_indices(s_small, small)
+ else:
+ slicelen = randrange(1, small+1) if small > 0 else 0
+ s_small = randslice_from_slicelen(slicelen, small)
+
+ # Create a slice of the same length for the bigger value.
+ s_big = randslice_from_slicelen(slicelen, big)
+ if randrange(2) == 0:
+ rshape[n], lshape[n] = big, small
+ rslices[n], lslices[n] = s_big, s_small
+ else:
+ rshape[n], lshape[n] = small, big
+ rslices[n], lslices[n] = s_small, s_big
+
+ return lshape, rshape, tuple(lslices), tuple(rslices)
+
+def randitems_from_structure(fmt, t):
+ """Return a list of random items for structure 't' with format
+ 'fmtchar'."""
+ memlen, itemsize, _, _, _, _ = t
+ return gen_items(memlen//itemsize, '#'+fmt, 'numpy')
+
+def ndarray_from_structure(items, fmt, t, flags=0):
+ """Return ndarray from the tuple returned by rand_structure()"""
+ memlen, itemsize, ndim, shape, strides, offset = t
+ return ndarray(items, shape=shape, strides=strides, format=fmt,
+ offset=offset, flags=ND_WRITABLE|flags)
+
+def numpy_array_from_structure(items, fmt, t):
+ """Return numpy_array from the tuple returned by rand_structure()"""
+ memlen, itemsize, ndim, shape, strides, offset = t
+ buf = bytearray(memlen)
+ for j, v in enumerate(items):
+ struct.pack_into(fmt, buf, j*itemsize, v)
+ return numpy_array(buffer=buf, shape=shape, strides=strides,
+ dtype=fmt, offset=offset)
+
+
+# ======================================================================
+# memoryview casts
+# ======================================================================
+
+def cast_items(exporter, fmt, itemsize, shape=None):
+ """Interpret the raw memory of 'exporter' as a list of items with
+ size 'itemsize'. If shape=None, the new structure is assumed to
+ be 1-D with n * itemsize = bytelen. If shape is given, the usual
+ constraint for contiguous arrays prod(shape) * itemsize = bytelen
+ applies. On success, return (items, shape). If the constraints
+ cannot be met, return (None, None). If a chunk of bytes is interpreted
+ as NaN as a result of float conversion, return ('nan', None)."""
+ bytelen = exporter.nbytes
+ if shape:
+ if prod(shape) * itemsize != bytelen:
+ return None, shape
+ elif shape == []:
+ if exporter.ndim == 0 or itemsize != bytelen:
+ return None, shape
+ else:
+ n, r = divmod(bytelen, itemsize)
+ shape = [n]
+ if r != 0:
+ return None, shape
+
+ mem = exporter.tobytes()
+ byteitems = [mem[i:i+itemsize] for i in range(0, len(mem), itemsize)]
+
+ items = []
+ for v in byteitems:
+ item = struct.unpack(fmt, v)[0]
+ if item != item:
+ return 'nan', shape
+ items.append(item)
+
+ return (items, shape) if shape != [] else (items[0], shape)
+
+def gencastshapes():
+ """Generate shapes to test casting."""
+ for n in range(32):
+ yield [n]
+ ndim = randrange(4, 6)
+ minshape = 1 if randrange(100) > 80 else 2
+ yield [randrange(minshape, 5) for _ in range(ndim)]
+ ndim = randrange(2, 4)
+ minshape = 1 if randrange(100) > 80 else 2
+ yield [randrange(minshape, 5) for _ in range(ndim)]
+
+
+# ======================================================================
+# Actual tests
+# ======================================================================
+
+def genslices(n):
+ """Generate all possible slices for a single dimension."""
+ return product(range(-n, n+1), range(-n, n+1), range(-n, n+1))
+
+def genslices_ndim(ndim, shape):
+ """Generate all possible slice tuples for 'shape'."""
+ iterables = [genslices(shape[n]) for n in range(ndim)]
+ return product(*iterables)
+
+def rslice(n, allow_empty=False):
+ """Generate random slice for a single dimension of length n.
+ If zero=True, the slices may be empty, otherwise they will
+ be non-empty."""
+ minlen = 0 if allow_empty or n == 0 else 1
+ slicelen = randrange(minlen, n+1)
+ return randslice_from_slicelen(slicelen, n)
+
+def rslices(n, allow_empty=False):
+ """Generate random slices for a single dimension."""
+ for _ in range(5):
+ yield rslice(n, allow_empty)
+
+def rslices_ndim(ndim, shape, iterations=5):
+ """Generate random slice tuples for 'shape'."""
+ # non-empty slices
+ for _ in range(iterations):
+ yield tuple(rslice(shape[n]) for n in range(ndim))
+ # possibly empty slices
+ for _ in range(iterations):
+ yield tuple(rslice(shape[n], allow_empty=True) for n in range(ndim))
+ # invalid slices
+ yield tuple(slice(0,1,0) for _ in range(ndim))
+
+def rpermutation(iterable, r=None):
+ pool = tuple(iterable)
+ r = len(pool) if r is None else r
+ yield tuple(sample(pool, r))
+
+def ndarray_print(nd):
+ """Print ndarray for debugging."""
+ try:
+ x = nd.tolist()
+ except (TypeError, NotImplementedError):
+ x = nd.tobytes()
+ if isinstance(nd, ndarray):
+ offset = nd.offset
+ flags = nd.flags
+ else:
+ offset = 'unknown'
+ flags = 'unknown'
+ print("ndarray(%s, shape=%s, strides=%s, suboffsets=%s, offset=%s, "
+ "format='%s', itemsize=%s, flags=%s)" %
+ (x, nd.shape, nd.strides, nd.suboffsets, offset,
+ nd.format, nd.itemsize, flags))
+ sys.stdout.flush()
+
+
+ITERATIONS = 100
+MAXDIM = 5
+MAXSHAPE = 10
+
+if SHORT_TEST:
+ ITERATIONS = 10
+ MAXDIM = 3
+ MAXSHAPE = 4
+ genslices = rslices
+ genslices_ndim = rslices_ndim
+ permutations = rpermutation
+
+
+@unittest.skipUnless(struct, 'struct module required for this test.')
+@unittest.skipUnless(ndarray, 'ndarray object required for this test')
+class TestBufferProtocol(unittest.TestCase):
+
+ def setUp(self):
+ # The suboffsets tests need sizeof(void *).
+ self.sizeof_void_p = get_sizeof_void_p()
+
+ def verify(self, result, obj=-1,
+ itemsize={1}, fmt=-1, readonly={1},
+ ndim={1}, shape=-1, strides=-1,
+ lst=-1, sliced=False, cast=False):
+ # Verify buffer contents against expected values. Default values
+ # are deliberately initialized to invalid types.
+ if shape:
+ expected_len = prod(shape)*itemsize
+ else:
+ if not fmt: # array has been implicitly cast to unsigned bytes
+ expected_len = len(lst)
+ else: # ndim = 0
+ expected_len = itemsize
+
+ # Reconstruct suboffsets from strides. Support for slicing
+ # could be added, but is currently only needed for test_getbuf().
+ suboffsets = ()
+ if result.suboffsets:
+ self.assertGreater(ndim, 0)
+
+ suboffset0 = 0
+ for n in range(1, ndim):
+ if shape[n] == 0:
+ break
+ if strides[n] <= 0:
+ suboffset0 += -strides[n] * (shape[n]-1)
+
+ suboffsets = [suboffset0] + [-1 for v in range(ndim-1)]
+
+ # Not correct if slicing has occurred in the first dimension.
+ stride0 = self.sizeof_void_p
+ if strides[0] < 0:
+ stride0 = -stride0
+ strides = [stride0] + list(strides[1:])
+
+ self.assertIs(result.obj, obj)
+ self.assertEqual(result.nbytes, expected_len)
+ self.assertEqual(result.itemsize, itemsize)
+ self.assertEqual(result.format, fmt)
+ self.assertEqual(result.readonly, readonly)
+ self.assertEqual(result.ndim, ndim)
+ self.assertEqual(result.shape, tuple(shape))
+ if not (sliced and suboffsets):
+ self.assertEqual(result.strides, tuple(strides))
+ self.assertEqual(result.suboffsets, tuple(suboffsets))
+
+ if isinstance(result, ndarray) or is_memoryview_format(fmt):
+ rep = result.tolist() if fmt else result.tobytes()
+ self.assertEqual(rep, lst)
+
+ if not fmt: # array has been cast to unsigned bytes,
+ return # the remaining tests won't work.
+
+ # PyBuffer_GetPointer() is the definition how to access an item.
+ # If PyBuffer_GetPointer(indices) is correct for all possible
+ # combinations of indices, the buffer is correct.
+ #
+ # Also test tobytes() against the flattened 'lst', with all items
+ # packed to bytes.
+ if not cast: # casts chop up 'lst' in different ways
+ b = bytearray()
+ buf_err = None
+ for ind in indices(shape):
+ try:
+ item1 = get_pointer(result, ind)
+ item2 = get_item(lst, ind)
+ if isinstance(item2, tuple):
+ x = struct.pack(fmt, *item2)
+ else:
+ x = struct.pack(fmt, item2)
+ b.extend(x)
+ except BufferError:
+ buf_err = True # re-exporter does not provide full buffer
+ break
+ self.assertEqual(item1, item2)
+
+ if not buf_err:
+ # test tobytes()
+ self.assertEqual(result.tobytes(), b)
+
+ # lst := expected multi-dimensional logical representation
+ # flatten(lst) := elements in C-order
+ ff = fmt if fmt else 'B'
+ flattened = flatten(lst)
+
+ # Rules for 'A': if the array is already contiguous, return
+ # the array unaltered. Otherwise, return a contiguous 'C'
+ # representation.
+ for order in ['C', 'F', 'A']:
+ expected = result
+ if order == 'F':
+ if not is_contiguous(result, 'A') or \
+ is_contiguous(result, 'C'):
+ # For constructing the ndarray, convert the
+ # flattened logical representation to Fortran order.
+ trans = transpose(flattened, shape)
+ expected = ndarray(trans, shape=shape, format=ff,
+ flags=ND_FORTRAN)
+ else: # 'C', 'A'
+ if not is_contiguous(result, 'A') or \
+ is_contiguous(result, 'F') and order == 'C':
+ # The flattened list is already in C-order.
+ expected = ndarray(flattened, shape=shape, format=ff)
+
+ contig = get_contiguous(result, PyBUF_READ, order)
+ self.assertEqual(contig.tobytes(), b)
+ self.assertTrue(cmp_contig(contig, expected))
+
+ if ndim == 0:
+ continue
+
+ nmemb = len(flattened)
+ ro = 0 if readonly else ND_WRITABLE
+
+ ### See comment in test_py_buffer_to_contiguous for an
+ ### explanation why these tests are valid.
+
+ # To 'C'
+ contig = py_buffer_to_contiguous(result, 'C', PyBUF_FULL_RO)
+ self.assertEqual(len(contig), nmemb * itemsize)
+ initlst = [struct.unpack_from(fmt, contig, n*itemsize)
+ for n in range(nmemb)]
+ if len(initlst[0]) == 1:
+ initlst = [v[0] for v in initlst]
+
+ y = ndarray(initlst, shape=shape, flags=ro, format=fmt)
+ self.assertEqual(memoryview(y), memoryview(result))
+
+ # To 'F'
+ contig = py_buffer_to_contiguous(result, 'F', PyBUF_FULL_RO)
+ self.assertEqual(len(contig), nmemb * itemsize)
+ initlst = [struct.unpack_from(fmt, contig, n*itemsize)
+ for n in range(nmemb)]
+ if len(initlst[0]) == 1:
+ initlst = [v[0] for v in initlst]
+
+ y = ndarray(initlst, shape=shape, flags=ro|ND_FORTRAN,
+ format=fmt)
+ self.assertEqual(memoryview(y), memoryview(result))
+
+ # To 'A'
+ contig = py_buffer_to_contiguous(result, 'A', PyBUF_FULL_RO)
+ self.assertEqual(len(contig), nmemb * itemsize)
+ initlst = [struct.unpack_from(fmt, contig, n*itemsize)
+ for n in range(nmemb)]
+ if len(initlst[0]) == 1:
+ initlst = [v[0] for v in initlst]
+
+ f = ND_FORTRAN if is_contiguous(result, 'F') else 0
+ y = ndarray(initlst, shape=shape, flags=f|ro, format=fmt)
+ self.assertEqual(memoryview(y), memoryview(result))
+
+ if is_memoryview_format(fmt):
+ try:
+ m = memoryview(result)
+ except BufferError: # re-exporter does not provide full information
+ return
+ ex = result.obj if isinstance(result, memoryview) else result
+ self.assertIs(m.obj, ex)
+ self.assertEqual(m.nbytes, expected_len)
+ self.assertEqual(m.itemsize, itemsize)
+ self.assertEqual(m.format, fmt)
+ self.assertEqual(m.readonly, readonly)
+ self.assertEqual(m.ndim, ndim)
+ self.assertEqual(m.shape, tuple(shape))
+ if not (sliced and suboffsets):
+ self.assertEqual(m.strides, tuple(strides))
+ self.assertEqual(m.suboffsets, tuple(suboffsets))
+
+ n = 1 if ndim == 0 else len(lst)
+ self.assertEqual(len(m), n)
+
+ rep = result.tolist() if fmt else result.tobytes()
+ self.assertEqual(rep, lst)
+ self.assertEqual(m, result)
+
+ def verify_getbuf(self, orig_ex, ex, req, sliced=False):
+ def simple_fmt(ex):
+ return ex.format == '' or ex.format == 'B'
+ def match(req, flag):
+ return ((req&flag) == flag)
+
+ if (# writable request to read-only exporter
+ (ex.readonly and match(req, PyBUF_WRITABLE)) or
+ # cannot match explicit contiguity request
+ (match(req, PyBUF_C_CONTIGUOUS) and not ex.c_contiguous) or
+ (match(req, PyBUF_F_CONTIGUOUS) and not ex.f_contiguous) or
+ (match(req, PyBUF_ANY_CONTIGUOUS) and not ex.contiguous) or
+ # buffer needs suboffsets
+ (not match(req, PyBUF_INDIRECT) and ex.suboffsets) or
+ # buffer without strides must be C-contiguous
+ (not match(req, PyBUF_STRIDES) and not ex.c_contiguous) or
+ # PyBUF_SIMPLE|PyBUF_FORMAT and PyBUF_WRITABLE|PyBUF_FORMAT
+ (not match(req, PyBUF_ND) and match(req, PyBUF_FORMAT))):
+
+ self.assertRaises(BufferError, ndarray, ex, getbuf=req)
+ return
+
+ if isinstance(ex, ndarray) or is_memoryview_format(ex.format):
+ lst = ex.tolist()
+ else:
+ nd = ndarray(ex, getbuf=PyBUF_FULL_RO)
+ lst = nd.tolist()
+
+ # The consumer may have requested default values or a NULL format.
+ ro = 0 if match(req, PyBUF_WRITABLE) else ex.readonly
+ fmt = ex.format
+ itemsize = ex.itemsize
+ ndim = ex.ndim
+ if not match(req, PyBUF_FORMAT):
+ # itemsize refers to the original itemsize before the cast.
+ # The equality product(shape) * itemsize = len still holds.
+ # The equality calcsize(format) = itemsize does _not_ hold.
+ fmt = ''
+ lst = orig_ex.tobytes() # Issue 12834
+ if not match(req, PyBUF_ND):
+ ndim = 1
+ shape = orig_ex.shape if match(req, PyBUF_ND) else ()
+ strides = orig_ex.strides if match(req, PyBUF_STRIDES) else ()
+
+ nd = ndarray(ex, getbuf=req)
+ self.verify(nd, obj=ex,
+ itemsize=itemsize, fmt=fmt, readonly=ro,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst, sliced=sliced)
+
+ def test_ndarray_getbuf(self):
+ requests = (
+ # distinct flags
+ PyBUF_INDIRECT, PyBUF_STRIDES, PyBUF_ND, PyBUF_SIMPLE,
+ PyBUF_C_CONTIGUOUS, PyBUF_F_CONTIGUOUS, PyBUF_ANY_CONTIGUOUS,
+ # compound requests
+ PyBUF_FULL, PyBUF_FULL_RO,
+ PyBUF_RECORDS, PyBUF_RECORDS_RO,
+ PyBUF_STRIDED, PyBUF_STRIDED_RO,
+ PyBUF_CONTIG, PyBUF_CONTIG_RO,
+ )
+ # items and format
+ items_fmt = (
+ ([True if x % 2 else False for x in range(12)], '?'),
+ ([1,2,3,4,5,6,7,8,9,10,11,12], 'b'),
+ ([1,2,3,4,5,6,7,8,9,10,11,12], 'B'),
+ ([(2**31-x) if x % 2 else (-2**31+x) for x in range(12)], 'l')
+ )
+ # shape, strides, offset
+ structure = (
+ ([], [], 0),
+ ([12], [], 0),
+ ([12], [-1], 11),
+ ([6], [2], 0),
+ ([6], [-2], 11),
+ ([3, 4], [], 0),
+ ([3, 4], [-4, -1], 11),
+ ([2, 2], [4, 1], 4),
+ ([2, 2], [-4, -1], 8)
+ )
+ # ndarray creation flags
+ ndflags = (
+ 0, ND_WRITABLE, ND_FORTRAN, ND_FORTRAN|ND_WRITABLE,
+ ND_PIL, ND_PIL|ND_WRITABLE
+ )
+ # flags that can actually be used as flags
+ real_flags = (0, PyBUF_WRITABLE, PyBUF_FORMAT,
+ PyBUF_WRITABLE|PyBUF_FORMAT)
+
+ for items, fmt in items_fmt:
+ itemsize = struct.calcsize(fmt)
+ for shape, strides, offset in structure:
+ strides = [v * itemsize for v in strides]
+ offset *= itemsize
+ for flags in ndflags:
+
+ if strides and (flags&ND_FORTRAN):
+ continue
+ if not shape and (flags&ND_PIL):
+ continue
+
+ _items = items if shape else items[0]
+ ex1 = ndarray(_items, format=fmt, flags=flags,
+ shape=shape, strides=strides, offset=offset)
+ ex2 = ex1[::-2] if shape else None
+
+ m1 = memoryview(ex1)
+ if ex2:
+ m2 = memoryview(ex2)
+ if ex1.ndim == 0 or (ex1.ndim == 1 and shape and strides):
+ self.assertEqual(m1, ex1)
+ if ex2 and ex2.ndim == 1 and shape and strides:
+ self.assertEqual(m2, ex2)
+
+ for req in requests:
+ for bits in real_flags:
+ self.verify_getbuf(ex1, ex1, req|bits)
+ self.verify_getbuf(ex1, m1, req|bits)
+ if ex2:
+ self.verify_getbuf(ex2, ex2, req|bits,
+ sliced=True)
+ self.verify_getbuf(ex2, m2, req|bits,
+ sliced=True)
+
+ items = [1,2,3,4,5,6,7,8,9,10,11,12]
+
+ # ND_GETBUF_FAIL
+ ex = ndarray(items, shape=[12], flags=ND_GETBUF_FAIL)
+ self.assertRaises(BufferError, ndarray, ex)
+
+ # Request complex structure from a simple exporter. In this
+ # particular case the test object is not PEP-3118 compliant.
+ base = ndarray([9], [1])
+ ex = ndarray(base, getbuf=PyBUF_SIMPLE)
+ self.assertRaises(BufferError, ndarray, ex, getbuf=PyBUF_WRITABLE)
+ self.assertRaises(BufferError, ndarray, ex, getbuf=PyBUF_ND)
+ self.assertRaises(BufferError, ndarray, ex, getbuf=PyBUF_STRIDES)
+ self.assertRaises(BufferError, ndarray, ex, getbuf=PyBUF_C_CONTIGUOUS)
+ self.assertRaises(BufferError, ndarray, ex, getbuf=PyBUF_F_CONTIGUOUS)
+ self.assertRaises(BufferError, ndarray, ex, getbuf=PyBUF_ANY_CONTIGUOUS)
+ nd = ndarray(ex, getbuf=PyBUF_SIMPLE)
+
+ def test_ndarray_exceptions(self):
+ nd = ndarray([9], [1])
+ ndm = ndarray([9], [1], flags=ND_VAREXPORT)
+
+ # Initialization of a new ndarray or mutation of an existing array.
+ for c in (ndarray, nd.push, ndm.push):
+ # Invalid types.
+ self.assertRaises(TypeError, c, {1,2,3})
+ self.assertRaises(TypeError, c, [1,2,'3'])
+ self.assertRaises(TypeError, c, [1,2,(3,4)])
+ self.assertRaises(TypeError, c, [1,2,3], shape={3})
+ self.assertRaises(TypeError, c, [1,2,3], shape=[3], strides={1})
+ self.assertRaises(TypeError, c, [1,2,3], shape=[3], offset=[])
+ self.assertRaises(TypeError, c, [1], shape=[1], format={})
+ self.assertRaises(TypeError, c, [1], shape=[1], flags={})
+ self.assertRaises(TypeError, c, [1], shape=[1], getbuf={})
+
+ # ND_FORTRAN flag is only valid without strides.
+ self.assertRaises(TypeError, c, [1], shape=[1], strides=[1],
+ flags=ND_FORTRAN)
+
+ # ND_PIL flag is only valid with ndim > 0.
+ self.assertRaises(TypeError, c, [1], shape=[], flags=ND_PIL)
+
+ # Invalid items.
+ self.assertRaises(ValueError, c, [], shape=[1])
+ self.assertRaises(ValueError, c, ['XXX'], shape=[1], format="L")
+ # Invalid combination of items and format.
+ self.assertRaises(struct.error, c, [1000], shape=[1], format="B")
+ self.assertRaises(ValueError, c, [1,(2,3)], shape=[2], format="B")
+ self.assertRaises(ValueError, c, [1,2,3], shape=[3], format="QL")
+
+ # Invalid ndim.
+ n = ND_MAX_NDIM+1
+ self.assertRaises(ValueError, c, [1]*n, shape=[1]*n)
+
+ # Invalid shape.
+ self.assertRaises(ValueError, c, [1], shape=[-1])
+ self.assertRaises(ValueError, c, [1,2,3], shape=['3'])
+ self.assertRaises(OverflowError, c, [1], shape=[2**128])
+ # prod(shape) * itemsize != len(items)
+ self.assertRaises(ValueError, c, [1,2,3,4,5], shape=[2,2], offset=3)
+
+ # Invalid strides.
+ self.assertRaises(ValueError, c, [1,2,3], shape=[3], strides=['1'])
+ self.assertRaises(OverflowError, c, [1], shape=[1],
+ strides=[2**128])
+
+ # Invalid combination of strides and shape.
+ self.assertRaises(ValueError, c, [1,2], shape=[2,1], strides=[1])
+ # Invalid combination of strides and format.
+ self.assertRaises(ValueError, c, [1,2,3,4], shape=[2], strides=[3],
+ format="L")
+
+ # Invalid offset.
+ self.assertRaises(ValueError, c, [1,2,3], shape=[3], offset=4)
+ self.assertRaises(ValueError, c, [1,2,3], shape=[1], offset=3,
+ format="L")
+
+ # Invalid format.
+ self.assertRaises(ValueError, c, [1,2,3], shape=[3], format="")
+ self.assertRaises(struct.error, c, [(1,2,3)], shape=[1],
+ format="@#$")
+
+ # Striding out of the memory bounds.
+ items = [1,2,3,4,5,6,7,8,9,10]
+ self.assertRaises(ValueError, c, items, shape=[2,3],
+ strides=[-3, -2], offset=5)
+
+ # Constructing consumer: format argument invalid.
+ self.assertRaises(TypeError, c, bytearray(), format="Q")
+
+ # Constructing original base object: getbuf argument invalid.
+ self.assertRaises(TypeError, c, [1], shape=[1], getbuf=PyBUF_FULL)
+
+ # Shape argument is mandatory for original base objects.
+ self.assertRaises(TypeError, c, [1])
+
+
+ # PyBUF_WRITABLE request to read-only provider.
+ self.assertRaises(BufferError, ndarray, b'123', getbuf=PyBUF_WRITABLE)
+
+ # ND_VAREXPORT can only be specified during construction.
+ nd = ndarray([9], [1], flags=ND_VAREXPORT)
+ self.assertRaises(ValueError, nd.push, [1], [1], flags=ND_VAREXPORT)
+
+ # Invalid operation for consumers: push/pop
+ nd = ndarray(b'123')
+ self.assertRaises(BufferError, nd.push, [1], [1])
+ self.assertRaises(BufferError, nd.pop)
+
+ # ND_VAREXPORT not set: push/pop fail with exported buffers
+ nd = ndarray([9], [1])
+ nd.push([1], [1])
+ m = memoryview(nd)
+ self.assertRaises(BufferError, nd.push, [1], [1])
+ self.assertRaises(BufferError, nd.pop)
+ m.release()
+ nd.pop()
+
+ # Single remaining buffer: pop fails
+ self.assertRaises(BufferError, nd.pop)
+ del nd
+
+ # get_pointer()
+ self.assertRaises(TypeError, get_pointer, {}, [1,2,3])
+ self.assertRaises(TypeError, get_pointer, b'123', {})
+
+ nd = ndarray(list(range(100)), shape=[1]*100)
+ self.assertRaises(ValueError, get_pointer, nd, [5])
+
+ nd = ndarray(list(range(12)), shape=[3,4])
+ self.assertRaises(ValueError, get_pointer, nd, [2,3,4])
+ self.assertRaises(ValueError, get_pointer, nd, [3,3])
+ self.assertRaises(ValueError, get_pointer, nd, [-3,3])
+ self.assertRaises(OverflowError, get_pointer, nd, [1<<64,3])
+
+ # tolist() needs format
+ ex = ndarray([1,2,3], shape=[3], format='L')
+ nd = ndarray(ex, getbuf=PyBUF_SIMPLE)
+ self.assertRaises(ValueError, nd.tolist)
+
+ # memoryview_from_buffer()
+ ex1 = ndarray([1,2,3], shape=[3], format='L')
+ ex2 = ndarray(ex1)
+ nd = ndarray(ex2)
+ self.assertRaises(TypeError, nd.memoryview_from_buffer)
+
+ nd = ndarray([(1,)*200], shape=[1], format='L'*200)
+ self.assertRaises(TypeError, nd.memoryview_from_buffer)
+
+ n = ND_MAX_NDIM
+ nd = ndarray(list(range(n)), shape=[1]*n)
+ self.assertRaises(ValueError, nd.memoryview_from_buffer)
+
+ # get_contiguous()
+ nd = ndarray([1], shape=[1])
+ self.assertRaises(TypeError, get_contiguous, 1, 2, 3, 4, 5)
+ self.assertRaises(TypeError, get_contiguous, nd, "xyz", 'C')
+ self.assertRaises(OverflowError, get_contiguous, nd, 2**64, 'C')
+ self.assertRaises(TypeError, get_contiguous, nd, PyBUF_READ, 961)
+ self.assertRaises(UnicodeEncodeError, get_contiguous, nd, PyBUF_READ,
+ '\u2007')
+ self.assertRaises(ValueError, get_contiguous, nd, PyBUF_READ, 'Z')
+ self.assertRaises(ValueError, get_contiguous, nd, 255, 'A')
+
+ # cmp_contig()
+ nd = ndarray([1], shape=[1])
+ self.assertRaises(TypeError, cmp_contig, 1, 2, 3, 4, 5)
+ self.assertRaises(TypeError, cmp_contig, {}, nd)
+ self.assertRaises(TypeError, cmp_contig, nd, {})
+
+ # is_contiguous()
+ nd = ndarray([1], shape=[1])
+ self.assertRaises(TypeError, is_contiguous, 1, 2, 3, 4, 5)
+ self.assertRaises(TypeError, is_contiguous, {}, 'A')
+ self.assertRaises(TypeError, is_contiguous, nd, 201)
+
+ def test_ndarray_linked_list(self):
+ for perm in permutations(range(5)):
+ m = [0]*5
+ nd = ndarray([1,2,3], shape=[3], flags=ND_VAREXPORT)
+ m[0] = memoryview(nd)
+
+ for i in range(1, 5):
+ nd.push([1,2,3], shape=[3])
+ m[i] = memoryview(nd)
+
+ for i in range(5):
+ m[perm[i]].release()
+
+ self.assertRaises(BufferError, nd.pop)
+ del nd
+
+ def test_ndarray_format_scalar(self):
+ # ndim = 0: scalar
+ for fmt, scalar, _ in iter_format(0):
+ itemsize = struct.calcsize(fmt)
+ nd = ndarray(scalar, shape=(), format=fmt)
+ self.verify(nd, obj=None,
+ itemsize=itemsize, fmt=fmt, readonly=1,
+ ndim=0, shape=(), strides=(),
+ lst=scalar)
+
+ def test_ndarray_format_shape(self):
+ # ndim = 1, shape = [n]
+ nitems = randrange(1, 10)
+ for fmt, items, _ in iter_format(nitems):
+ itemsize = struct.calcsize(fmt)
+ for flags in (0, ND_PIL):
+ nd = ndarray(items, shape=[nitems], format=fmt, flags=flags)
+ self.verify(nd, obj=None,
+ itemsize=itemsize, fmt=fmt, readonly=1,
+ ndim=1, shape=(nitems,), strides=(itemsize,),
+ lst=items)
+
+ def test_ndarray_format_strides(self):
+ # ndim = 1, strides
+ nitems = randrange(1, 30)
+ for fmt, items, _ in iter_format(nitems):
+ itemsize = struct.calcsize(fmt)
+ for step in range(-5, 5):
+ if step == 0:
+ continue
+
+ shape = [len(items[::step])]
+ strides = [step*itemsize]
+ offset = itemsize*(nitems-1) if step < 0 else 0
+
+ for flags in (0, ND_PIL):
+ nd = ndarray(items, shape=shape, strides=strides,
+ format=fmt, offset=offset, flags=flags)
+ self.verify(nd, obj=None,
+ itemsize=itemsize, fmt=fmt, readonly=1,
+ ndim=1, shape=shape, strides=strides,
+ lst=items[::step])
+
+ def test_ndarray_fortran(self):
+ items = [1,2,3,4,5,6,7,8,9,10,11,12]
+ ex = ndarray(items, shape=(3, 4), strides=(1, 3))
+ nd = ndarray(ex, getbuf=PyBUF_F_CONTIGUOUS|PyBUF_FORMAT)
+ self.assertEqual(nd.tolist(), farray(items, (3, 4)))
+
+ def test_ndarray_multidim(self):
+ for ndim in range(5):
+ shape_t = [randrange(2, 10) for _ in range(ndim)]
+ nitems = prod(shape_t)
+ for shape in permutations(shape_t):
+
+ fmt, items, _ = randitems(nitems)
+ itemsize = struct.calcsize(fmt)
+
+ for flags in (0, ND_PIL):
+ if ndim == 0 and flags == ND_PIL:
+ continue
+
+ # C array
+ nd = ndarray(items, shape=shape, format=fmt, flags=flags)
+
+ strides = strides_from_shape(ndim, shape, itemsize, 'C')
+ lst = carray(items, shape)
+ self.verify(nd, obj=None,
+ itemsize=itemsize, fmt=fmt, readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst)
+
+ if is_memoryview_format(fmt):
+ # memoryview: reconstruct strides
+ ex = ndarray(items, shape=shape, format=fmt)
+ nd = ndarray(ex, getbuf=PyBUF_CONTIG_RO|PyBUF_FORMAT)
+ self.assertTrue(nd.strides == ())
+ mv = nd.memoryview_from_buffer()
+ self.verify(mv, obj=None,
+ itemsize=itemsize, fmt=fmt, readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst)
+
+ # Fortran array
+ nd = ndarray(items, shape=shape, format=fmt,
+ flags=flags|ND_FORTRAN)
+
+ strides = strides_from_shape(ndim, shape, itemsize, 'F')
+ lst = farray(items, shape)
+ self.verify(nd, obj=None,
+ itemsize=itemsize, fmt=fmt, readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst)
+
+ def test_ndarray_index_invalid(self):
+ # not writable
+ nd = ndarray([1], shape=[1])
+ self.assertRaises(TypeError, nd.__setitem__, 1, 8)
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ self.assertRaises(TypeError, mv.__setitem__, 1, 8)
+
+ # cannot be deleted
+ nd = ndarray([1], shape=[1], flags=ND_WRITABLE)
+ self.assertRaises(TypeError, nd.__delitem__, 1)
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ self.assertRaises(TypeError, mv.__delitem__, 1)
+
+ # overflow
+ nd = ndarray([1], shape=[1], flags=ND_WRITABLE)
+ self.assertRaises(OverflowError, nd.__getitem__, 1<<64)
+ self.assertRaises(OverflowError, nd.__setitem__, 1<<64, 8)
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ self.assertRaises(IndexError, mv.__getitem__, 1<<64)
+ self.assertRaises(IndexError, mv.__setitem__, 1<<64, 8)
+
+ # format
+ items = [1,2,3,4,5,6,7,8]
+ nd = ndarray(items, shape=[len(items)], format="B", flags=ND_WRITABLE)
+ self.assertRaises(struct.error, nd.__setitem__, 2, 300)
+ self.assertRaises(ValueError, nd.__setitem__, 1, (100, 200))
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ self.assertRaises(ValueError, mv.__setitem__, 2, 300)
+ self.assertRaises(TypeError, mv.__setitem__, 1, (100, 200))
+
+ items = [(1,2), (3,4), (5,6)]
+ nd = ndarray(items, shape=[len(items)], format="LQ", flags=ND_WRITABLE)
+ self.assertRaises(ValueError, nd.__setitem__, 2, 300)
+ self.assertRaises(struct.error, nd.__setitem__, 1, (b'\x001', 200))
+
+ def test_ndarray_index_scalar(self):
+ # scalar
+ nd = ndarray(1, shape=(), flags=ND_WRITABLE)
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+
+ x = nd[()]; self.assertEqual(x, 1)
+ x = nd[...]; self.assertEqual(x.tolist(), nd.tolist())
+
+ x = mv[()]; self.assertEqual(x, 1)
+ x = mv[...]; self.assertEqual(x.tolist(), nd.tolist())
+
+ self.assertRaises(TypeError, nd.__getitem__, 0)
+ self.assertRaises(TypeError, mv.__getitem__, 0)
+ self.assertRaises(TypeError, nd.__setitem__, 0, 8)
+ self.assertRaises(TypeError, mv.__setitem__, 0, 8)
+
+ self.assertEqual(nd.tolist(), 1)
+ self.assertEqual(mv.tolist(), 1)
+
+ nd[()] = 9; self.assertEqual(nd.tolist(), 9)
+ mv[()] = 9; self.assertEqual(mv.tolist(), 9)
+
+ nd[...] = 5; self.assertEqual(nd.tolist(), 5)
+ mv[...] = 5; self.assertEqual(mv.tolist(), 5)
+
+ def test_ndarray_index_null_strides(self):
+ ex = ndarray(list(range(2*4)), shape=[2, 4], flags=ND_WRITABLE)
+ nd = ndarray(ex, getbuf=PyBUF_CONTIG)
+
+ # Sub-views are only possible for full exporters.
+ self.assertRaises(BufferError, nd.__getitem__, 1)
+ # Same for slices.
+ self.assertRaises(BufferError, nd.__getitem__, slice(3,5,1))
+
+ def test_ndarray_index_getitem_single(self):
+ # getitem
+ for fmt, items, _ in iter_format(5):
+ nd = ndarray(items, shape=[5], format=fmt)
+ for i in range(-5, 5):
+ self.assertEqual(nd[i], items[i])
+
+ self.assertRaises(IndexError, nd.__getitem__, -6)
+ self.assertRaises(IndexError, nd.__getitem__, 5)
+
+ if is_memoryview_format(fmt):
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ for i in range(-5, 5):
+ self.assertEqual(mv[i], items[i])
+
+ self.assertRaises(IndexError, mv.__getitem__, -6)
+ self.assertRaises(IndexError, mv.__getitem__, 5)
+
+ # getitem with null strides
+ for fmt, items, _ in iter_format(5):
+ ex = ndarray(items, shape=[5], flags=ND_WRITABLE, format=fmt)
+ nd = ndarray(ex, getbuf=PyBUF_CONTIG|PyBUF_FORMAT)
+
+ for i in range(-5, 5):
+ self.assertEqual(nd[i], items[i])
+
+ if is_memoryview_format(fmt):
+ mv = nd.memoryview_from_buffer()
+ self.assertIs(mv.__eq__(nd), NotImplemented)
+ for i in range(-5, 5):
+ self.assertEqual(mv[i], items[i])
+
+ # getitem with null format
+ items = [1,2,3,4,5]
+ ex = ndarray(items, shape=[5])
+ nd = ndarray(ex, getbuf=PyBUF_CONTIG_RO)
+ for i in range(-5, 5):
+ self.assertEqual(nd[i], items[i])
+
+ # getitem with null shape/strides/format
+ items = [1,2,3,4,5]
+ ex = ndarray(items, shape=[5])
+ nd = ndarray(ex, getbuf=PyBUF_SIMPLE)
+
+ for i in range(-5, 5):
+ self.assertEqual(nd[i], items[i])
+
+ def test_ndarray_index_setitem_single(self):
+ # assign single value
+ for fmt, items, single_item in iter_format(5):
+ nd = ndarray(items, shape=[5], format=fmt, flags=ND_WRITABLE)
+ for i in range(5):
+ items[i] = single_item
+ nd[i] = single_item
+ self.assertEqual(nd.tolist(), items)
+
+ self.assertRaises(IndexError, nd.__setitem__, -6, single_item)
+ self.assertRaises(IndexError, nd.__setitem__, 5, single_item)
+
+ if not is_memoryview_format(fmt):
+ continue
+
+ nd = ndarray(items, shape=[5], format=fmt, flags=ND_WRITABLE)
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ for i in range(5):
+ items[i] = single_item
+ mv[i] = single_item
+ self.assertEqual(mv.tolist(), items)
+
+ self.assertRaises(IndexError, mv.__setitem__, -6, single_item)
+ self.assertRaises(IndexError, mv.__setitem__, 5, single_item)
+
+
+ # assign single value: lobject = robject
+ for fmt, items, single_item in iter_format(5):
+ nd = ndarray(items, shape=[5], format=fmt, flags=ND_WRITABLE)
+ for i in range(-5, 4):
+ items[i] = items[i+1]
+ nd[i] = nd[i+1]
+ self.assertEqual(nd.tolist(), items)
+
+ if not is_memoryview_format(fmt):
+ continue
+
+ nd = ndarray(items, shape=[5], format=fmt, flags=ND_WRITABLE)
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ for i in range(-5, 4):
+ items[i] = items[i+1]
+ mv[i] = mv[i+1]
+ self.assertEqual(mv.tolist(), items)
+
+ def test_ndarray_index_getitem_multidim(self):
+ shape_t = (2, 3, 5)
+ nitems = prod(shape_t)
+ for shape in permutations(shape_t):
+
+ fmt, items, _ = randitems(nitems)
+
+ for flags in (0, ND_PIL):
+ # C array
+ nd = ndarray(items, shape=shape, format=fmt, flags=flags)
+ lst = carray(items, shape)
+
+ for i in range(-shape[0], shape[0]):
+ self.assertEqual(lst[i], nd[i].tolist())
+ for j in range(-shape[1], shape[1]):
+ self.assertEqual(lst[i][j], nd[i][j].tolist())
+ for k in range(-shape[2], shape[2]):
+ self.assertEqual(lst[i][j][k], nd[i][j][k])
+
+ # Fortran array
+ nd = ndarray(items, shape=shape, format=fmt,
+ flags=flags|ND_FORTRAN)
+ lst = farray(items, shape)
+
+ for i in range(-shape[0], shape[0]):
+ self.assertEqual(lst[i], nd[i].tolist())
+ for j in range(-shape[1], shape[1]):
+ self.assertEqual(lst[i][j], nd[i][j].tolist())
+ for k in range(shape[2], shape[2]):
+ self.assertEqual(lst[i][j][k], nd[i][j][k])
+
+ def test_ndarray_sequence(self):
+ nd = ndarray(1, shape=())
+ self.assertRaises(TypeError, eval, "1 in nd", locals())
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ self.assertRaises(TypeError, eval, "1 in mv", locals())
+
+ for fmt, items, _ in iter_format(5):
+ nd = ndarray(items, shape=[5], format=fmt)
+ for i, v in enumerate(nd):
+ self.assertEqual(v, items[i])
+ self.assertTrue(v in nd)
+
+ if is_memoryview_format(fmt):
+ mv = memoryview(nd)
+ for i, v in enumerate(mv):
+ self.assertEqual(v, items[i])
+ self.assertTrue(v in mv)
+
+ def test_ndarray_slice_invalid(self):
+ items = [1,2,3,4,5,6,7,8]
+
+ # rvalue is not an exporter
+ xl = ndarray(items, shape=[8], flags=ND_WRITABLE)
+ ml = memoryview(xl)
+ self.assertRaises(TypeError, xl.__setitem__, slice(0,8,1), items)
+ self.assertRaises(TypeError, ml.__setitem__, slice(0,8,1), items)
+
+ # rvalue is not a full exporter
+ xl = ndarray(items, shape=[8], flags=ND_WRITABLE)
+ ex = ndarray(items, shape=[8], flags=ND_WRITABLE)
+ xr = ndarray(ex, getbuf=PyBUF_ND)
+ self.assertRaises(BufferError, xl.__setitem__, slice(0,8,1), xr)
+
+ # zero step
+ nd = ndarray(items, shape=[8], format="L", flags=ND_WRITABLE)
+ mv = memoryview(nd)
+ self.assertRaises(ValueError, nd.__getitem__, slice(0,1,0))
+ self.assertRaises(ValueError, mv.__getitem__, slice(0,1,0))
+
+ nd = ndarray(items, shape=[2,4], format="L", flags=ND_WRITABLE)
+ mv = memoryview(nd)
+
+ self.assertRaises(ValueError, nd.__getitem__,
+ (slice(0,1,1), slice(0,1,0)))
+ self.assertRaises(ValueError, nd.__getitem__,
+ (slice(0,1,0), slice(0,1,1)))
+ self.assertRaises(TypeError, nd.__getitem__, "@%$")
+ self.assertRaises(TypeError, nd.__getitem__, ("@%$", slice(0,1,1)))
+ self.assertRaises(TypeError, nd.__getitem__, (slice(0,1,1), {}))
+
+ # memoryview: not implemented
+ self.assertRaises(NotImplementedError, mv.__getitem__,
+ (slice(0,1,1), slice(0,1,0)))
+ self.assertRaises(TypeError, mv.__getitem__, "@%$")
+
+ # differing format
+ xl = ndarray(items, shape=[8], format="B", flags=ND_WRITABLE)
+ xr = ndarray(items, shape=[8], format="b")
+ ml = memoryview(xl)
+ mr = memoryview(xr)
+ self.assertRaises(ValueError, xl.__setitem__, slice(0,1,1), xr[7:8])
+ self.assertEqual(xl.tolist(), items)
+ self.assertRaises(ValueError, ml.__setitem__, slice(0,1,1), mr[7:8])
+ self.assertEqual(ml.tolist(), items)
+
+ # differing itemsize
+ xl = ndarray(items, shape=[8], format="B", flags=ND_WRITABLE)
+ yr = ndarray(items, shape=[8], format="L")
+ ml = memoryview(xl)
+ mr = memoryview(xr)
+ self.assertRaises(ValueError, xl.__setitem__, slice(0,1,1), xr[7:8])
+ self.assertEqual(xl.tolist(), items)
+ self.assertRaises(ValueError, ml.__setitem__, slice(0,1,1), mr[7:8])
+ self.assertEqual(ml.tolist(), items)
+
+ # differing ndim
+ xl = ndarray(items, shape=[2, 4], format="b", flags=ND_WRITABLE)
+ xr = ndarray(items, shape=[8], format="b")
+ ml = memoryview(xl)
+ mr = memoryview(xr)
+ self.assertRaises(ValueError, xl.__setitem__, slice(0,1,1), xr[7:8])
+ self.assertEqual(xl.tolist(), [[1,2,3,4], [5,6,7,8]])
+ self.assertRaises(NotImplementedError, ml.__setitem__, slice(0,1,1),
+ mr[7:8])
+
+ # differing shape
+ xl = ndarray(items, shape=[8], format="b", flags=ND_WRITABLE)
+ xr = ndarray(items, shape=[8], format="b")
+ ml = memoryview(xl)
+ mr = memoryview(xr)
+ self.assertRaises(ValueError, xl.__setitem__, slice(0,2,1), xr[7:8])
+ self.assertEqual(xl.tolist(), items)
+ self.assertRaises(ValueError, ml.__setitem__, slice(0,2,1), mr[7:8])
+ self.assertEqual(ml.tolist(), items)
+
+ # _testbuffer.c module functions
+ self.assertRaises(TypeError, slice_indices, slice(0,1,2), {})
+ self.assertRaises(TypeError, slice_indices, "###########", 1)
+ self.assertRaises(ValueError, slice_indices, slice(0,1,0), 4)
+
+ x = ndarray(items, shape=[8], format="b", flags=ND_PIL)
+ self.assertRaises(TypeError, x.add_suboffsets)
+
+ ex = ndarray(items, shape=[8], format="B")
+ x = ndarray(ex, getbuf=PyBUF_SIMPLE)
+ self.assertRaises(TypeError, x.add_suboffsets)
+
+ def test_ndarray_slice_zero_shape(self):
+ items = [1,2,3,4,5,6,7,8,9,10,11,12]
+
+ x = ndarray(items, shape=[12], format="L", flags=ND_WRITABLE)
+ y = ndarray(items, shape=[12], format="L")
+ x[4:4] = y[9:9]
+ self.assertEqual(x.tolist(), items)
+
+ ml = memoryview(x)
+ mr = memoryview(y)
+ self.assertEqual(ml, x)
+ self.assertEqual(ml, y)
+ ml[4:4] = mr[9:9]
+ self.assertEqual(ml.tolist(), items)
+
+ x = ndarray(items, shape=[3, 4], format="L", flags=ND_WRITABLE)
+ y = ndarray(items, shape=[4, 3], format="L")
+ x[1:2, 2:2] = y[1:2, 3:3]
+ self.assertEqual(x.tolist(), carray(items, [3, 4]))
+
+ def test_ndarray_slice_multidim(self):
+ shape_t = (2, 3, 5)
+ ndim = len(shape_t)
+ nitems = prod(shape_t)
+ for shape in permutations(shape_t):
+
+ fmt, items, _ = randitems(nitems)
+ itemsize = struct.calcsize(fmt)
+
+ for flags in (0, ND_PIL):
+ nd = ndarray(items, shape=shape, format=fmt, flags=flags)
+ lst = carray(items, shape)
+
+ for slices in rslices_ndim(ndim, shape):
+
+ listerr = None
+ try:
+ sliced = multislice(lst, slices)
+ except Exception as e:
+ listerr = e.__class__
+
+ nderr = None
+ try:
+ ndsliced = nd[slices]
+ except Exception as e:
+ nderr = e.__class__
+
+ if nderr or listerr:
+ self.assertIs(nderr, listerr)
+ else:
+ self.assertEqual(ndsliced.tolist(), sliced)
+
+ def test_ndarray_slice_redundant_suboffsets(self):
+ shape_t = (2, 3, 5, 2)
+ ndim = len(shape_t)
+ nitems = prod(shape_t)
+ for shape in permutations(shape_t):
+
+ fmt, items, _ = randitems(nitems)
+ itemsize = struct.calcsize(fmt)
+
+ nd = ndarray(items, shape=shape, format=fmt)
+ nd.add_suboffsets()
+ ex = ndarray(items, shape=shape, format=fmt)
+ ex.add_suboffsets()
+ mv = memoryview(ex)
+ lst = carray(items, shape)
+
+ for slices in rslices_ndim(ndim, shape):
+
+ listerr = None
+ try:
+ sliced = multislice(lst, slices)
+ except Exception as e:
+ listerr = e.__class__
+
+ nderr = None
+ try:
+ ndsliced = nd[slices]
+ except Exception as e:
+ nderr = e.__class__
+
+ if nderr or listerr:
+ self.assertIs(nderr, listerr)
+ else:
+ self.assertEqual(ndsliced.tolist(), sliced)
+
+ def test_ndarray_slice_assign_single(self):
+ for fmt, items, _ in iter_format(5):
+ for lslice in genslices(5):
+ for rslice in genslices(5):
+ for flags in (0, ND_PIL):
+
+ f = flags|ND_WRITABLE
+ nd = ndarray(items, shape=[5], format=fmt, flags=f)
+ ex = ndarray(items, shape=[5], format=fmt, flags=f)
+ mv = memoryview(ex)
+
+ lsterr = None
+ diff_structure = None
+ lst = items[:]
+ try:
+ lval = lst[lslice]
+ rval = lst[rslice]
+ lst[lslice] = lst[rslice]
+ diff_structure = len(lval) != len(rval)
+ except Exception as e:
+ lsterr = e.__class__
+
+ nderr = None
+ try:
+ nd[lslice] = nd[rslice]
+ except Exception as e:
+ nderr = e.__class__
+
+ if diff_structure: # ndarray cannot change shape
+ self.assertIs(nderr, ValueError)
+ else:
+ self.assertEqual(nd.tolist(), lst)
+ self.assertIs(nderr, lsterr)
+
+ if not is_memoryview_format(fmt):
+ continue
+
+ mverr = None
+ try:
+ mv[lslice] = mv[rslice]
+ except Exception as e:
+ mverr = e.__class__
+
+ if diff_structure: # memoryview cannot change shape
+ self.assertIs(mverr, ValueError)
+ else:
+ self.assertEqual(mv.tolist(), lst)
+ self.assertEqual(mv, nd)
+ self.assertIs(mverr, lsterr)
+ self.verify(mv, obj=ex,
+ itemsize=nd.itemsize, fmt=fmt, readonly=0,
+ ndim=nd.ndim, shape=nd.shape, strides=nd.strides,
+ lst=nd.tolist())
+
+ def test_ndarray_slice_assign_multidim(self):
+ shape_t = (2, 3, 5)
+ ndim = len(shape_t)
+ nitems = prod(shape_t)
+ for shape in permutations(shape_t):
+
+ fmt, items, _ = randitems(nitems)
+
+ for flags in (0, ND_PIL):
+ for _ in range(ITERATIONS):
+ lslices, rslices = randslice_from_shape(ndim, shape)
+
+ nd = ndarray(items, shape=shape, format=fmt,
+ flags=flags|ND_WRITABLE)
+ lst = carray(items, shape)
+
+ listerr = None
+ try:
+ result = multislice_assign(lst, lst, lslices, rslices)
+ except Exception as e:
+ listerr = e.__class__
+
+ nderr = None
+ try:
+ nd[lslices] = nd[rslices]
+ except Exception as e:
+ nderr = e.__class__
+
+ if nderr or listerr:
+ self.assertIs(nderr, listerr)
+ else:
+ self.assertEqual(nd.tolist(), result)
+
+ def test_ndarray_random(self):
+ # construction of valid arrays
+ for _ in range(ITERATIONS):
+ for fmt in fmtdict['@']:
+ itemsize = struct.calcsize(fmt)
+
+ t = rand_structure(itemsize, True, maxdim=MAXDIM,
+ maxshape=MAXSHAPE)
+ self.assertTrue(verify_structure(*t))
+ items = randitems_from_structure(fmt, t)
+
+ x = ndarray_from_structure(items, fmt, t)
+ xlist = x.tolist()
+
+ mv = memoryview(x)
+ if is_memoryview_format(fmt):
+ mvlist = mv.tolist()
+ self.assertEqual(mvlist, xlist)
+
+ if t[2] > 0:
+ # ndim > 0: test against suboffsets representation.
+ y = ndarray_from_structure(items, fmt, t, flags=ND_PIL)
+ ylist = y.tolist()
+ self.assertEqual(xlist, ylist)
+
+ mv = memoryview(y)
+ if is_memoryview_format(fmt):
+ self.assertEqual(mv, y)
+ mvlist = mv.tolist()
+ self.assertEqual(mvlist, ylist)
+
+ if numpy_array:
+ shape = t[3]
+ if 0 in shape:
+ continue # http://projects.scipy.org/numpy/ticket/1910
+ z = numpy_array_from_structure(items, fmt, t)
+ self.verify(x, obj=None,
+ itemsize=z.itemsize, fmt=fmt, readonly=0,
+ ndim=z.ndim, shape=z.shape, strides=z.strides,
+ lst=z.tolist())
+
+ def test_ndarray_random_invalid(self):
+ # exceptions during construction of invalid arrays
+ for _ in range(ITERATIONS):
+ for fmt in fmtdict['@']:
+ itemsize = struct.calcsize(fmt)
+
+ t = rand_structure(itemsize, False, maxdim=MAXDIM,
+ maxshape=MAXSHAPE)
+ self.assertFalse(verify_structure(*t))
+ items = randitems_from_structure(fmt, t)
+
+ nderr = False
+ try:
+ x = ndarray_from_structure(items, fmt, t)
+ except Exception as e:
+ nderr = e.__class__
+ self.assertTrue(nderr)
+
+ if numpy_array:
+ numpy_err = False
+ try:
+ y = numpy_array_from_structure(items, fmt, t)
+ except Exception as e:
+ numpy_err = e.__class__
+
+ if 0: # http://projects.scipy.org/numpy/ticket/1910
+ self.assertTrue(numpy_err)
+
+ def test_ndarray_random_slice_assign(self):
+ # valid slice assignments
+ for _ in range(ITERATIONS):
+ for fmt in fmtdict['@']:
+ itemsize = struct.calcsize(fmt)
+
+ lshape, rshape, lslices, rslices = \
+ rand_aligned_slices(maxdim=MAXDIM, maxshape=MAXSHAPE)
+ tl = rand_structure(itemsize, True, shape=lshape)
+ tr = rand_structure(itemsize, True, shape=rshape)
+ self.assertTrue(verify_structure(*tl))
+ self.assertTrue(verify_structure(*tr))
+ litems = randitems_from_structure(fmt, tl)
+ ritems = randitems_from_structure(fmt, tr)
+
+ xl = ndarray_from_structure(litems, fmt, tl)
+ xr = ndarray_from_structure(ritems, fmt, tr)
+ xl[lslices] = xr[rslices]
+ xllist = xl.tolist()
+ xrlist = xr.tolist()
+
+ ml = memoryview(xl)
+ mr = memoryview(xr)
+ self.assertEqual(ml.tolist(), xllist)
+ self.assertEqual(mr.tolist(), xrlist)
+
+ if tl[2] > 0 and tr[2] > 0:
+ # ndim > 0: test against suboffsets representation.
+ yl = ndarray_from_structure(litems, fmt, tl, flags=ND_PIL)
+ yr = ndarray_from_structure(ritems, fmt, tr, flags=ND_PIL)
+ yl[lslices] = yr[rslices]
+ yllist = yl.tolist()
+ yrlist = yr.tolist()
+ self.assertEqual(xllist, yllist)
+ self.assertEqual(xrlist, yrlist)
+
+ ml = memoryview(yl)
+ mr = memoryview(yr)
+ self.assertEqual(ml.tolist(), yllist)
+ self.assertEqual(mr.tolist(), yrlist)
+
+ if numpy_array:
+ if 0 in lshape or 0 in rshape:
+ continue # http://projects.scipy.org/numpy/ticket/1910
+
+ zl = numpy_array_from_structure(litems, fmt, tl)
+ zr = numpy_array_from_structure(ritems, fmt, tr)
+ zl[lslices] = zr[rslices]
+
+ if not is_overlapping(tl) and not is_overlapping(tr):
+ # Slice assignment of overlapping structures
+ # is undefined in NumPy.
+ self.verify(xl, obj=None,
+ itemsize=zl.itemsize, fmt=fmt, readonly=0,
+ ndim=zl.ndim, shape=zl.shape,
+ strides=zl.strides, lst=zl.tolist())
+
+ self.verify(xr, obj=None,
+ itemsize=zr.itemsize, fmt=fmt, readonly=0,
+ ndim=zr.ndim, shape=zr.shape,
+ strides=zr.strides, lst=zr.tolist())
+
+ def test_ndarray_re_export(self):
+ items = [1,2,3,4,5,6,7,8,9,10,11,12]
+
+ nd = ndarray(items, shape=[3,4], flags=ND_PIL)
+ ex = ndarray(nd)
+
+ self.assertTrue(ex.flags & ND_PIL)
+ self.assertIs(ex.obj, nd)
+ self.assertEqual(ex.suboffsets, (0, -1))
+ self.assertFalse(ex.c_contiguous)
+ self.assertFalse(ex.f_contiguous)
+ self.assertFalse(ex.contiguous)
+
+ def test_ndarray_zero_shape(self):
+ # zeros in shape
+ for flags in (0, ND_PIL):
+ nd = ndarray([1,2,3], shape=[0], flags=flags)
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ self.assertEqual(nd.tolist(), [])
+ self.assertEqual(mv.tolist(), [])
+
+ nd = ndarray([1,2,3], shape=[0,3,3], flags=flags)
+ self.assertEqual(nd.tolist(), [])
+
+ nd = ndarray([1,2,3], shape=[3,0,3], flags=flags)
+ self.assertEqual(nd.tolist(), [[], [], []])
+
+ nd = ndarray([1,2,3], shape=[3,3,0], flags=flags)
+ self.assertEqual(nd.tolist(),
+ [[[], [], []], [[], [], []], [[], [], []]])
+
+ def test_ndarray_zero_strides(self):
+ # zero strides
+ for flags in (0, ND_PIL):
+ nd = ndarray([1], shape=[5], strides=[0], flags=flags)
+ mv = memoryview(nd)
+ self.assertEqual(mv, nd)
+ self.assertEqual(nd.tolist(), [1, 1, 1, 1, 1])
+ self.assertEqual(mv.tolist(), [1, 1, 1, 1, 1])
+
+ def test_ndarray_offset(self):
+ nd = ndarray(list(range(20)), shape=[3], offset=7)
+ self.assertEqual(nd.offset, 7)
+ self.assertEqual(nd.tolist(), [7,8,9])
+
+ def test_ndarray_memoryview_from_buffer(self):
+ for flags in (0, ND_PIL):
+ nd = ndarray(list(range(3)), shape=[3], flags=flags)
+ m = nd.memoryview_from_buffer()
+ self.assertEqual(m, nd)
+
+ def test_ndarray_get_pointer(self):
+ for flags in (0, ND_PIL):
+ nd = ndarray(list(range(3)), shape=[3], flags=flags)
+ for i in range(3):
+ self.assertEqual(nd[i], get_pointer(nd, [i]))
+
+ def test_ndarray_tolist_null_strides(self):
+ ex = ndarray(list(range(20)), shape=[2,2,5])
+
+ nd = ndarray(ex, getbuf=PyBUF_ND|PyBUF_FORMAT)
+ self.assertEqual(nd.tolist(), ex.tolist())
+
+ m = memoryview(ex)
+ self.assertEqual(m.tolist(), ex.tolist())
+
+ def test_ndarray_cmp_contig(self):
+
+ self.assertFalse(cmp_contig(b"123", b"456"))
+
+ x = ndarray(list(range(12)), shape=[3,4])
+ y = ndarray(list(range(12)), shape=[4,3])
+ self.assertFalse(cmp_contig(x, y))
+
+ x = ndarray([1], shape=[1], format="B")
+ self.assertTrue(cmp_contig(x, b'\x01'))
+ self.assertTrue(cmp_contig(b'\x01', x))
+
+ def test_ndarray_hash(self):
+
+ a = array.array('L', [1,2,3])
+ nd = ndarray(a)
+ self.assertRaises(ValueError, hash, nd)
+
+ # one-dimensional
+ b = bytes(list(range(12)))
+
+ nd = ndarray(list(range(12)), shape=[12])
+ self.assertEqual(hash(nd), hash(b))
+
+ # C-contiguous
+ nd = ndarray(list(range(12)), shape=[3,4])
+ self.assertEqual(hash(nd), hash(b))
+
+ nd = ndarray(list(range(12)), shape=[3,2,2])
+ self.assertEqual(hash(nd), hash(b))
+
+ # Fortran contiguous
+ b = bytes(transpose(list(range(12)), shape=[4,3]))
+ nd = ndarray(list(range(12)), shape=[3,4], flags=ND_FORTRAN)
+ self.assertEqual(hash(nd), hash(b))
+
+ b = bytes(transpose(list(range(12)), shape=[2,3,2]))
+ nd = ndarray(list(range(12)), shape=[2,3,2], flags=ND_FORTRAN)
+ self.assertEqual(hash(nd), hash(b))
+
+ # suboffsets
+ b = bytes(list(range(12)))
+ nd = ndarray(list(range(12)), shape=[2,2,3], flags=ND_PIL)
+ self.assertEqual(hash(nd), hash(b))
+
+ # non-byte formats
+ nd = ndarray(list(range(12)), shape=[2,2,3], format='L')
+ self.assertEqual(hash(nd), hash(nd.tobytes()))
+
+ def test_py_buffer_to_contiguous(self):
+
+ # The requests are used in _testbuffer.c:py_buffer_to_contiguous
+ # to generate buffers without full information for testing.
+ requests = (
+ # distinct flags
+ PyBUF_INDIRECT, PyBUF_STRIDES, PyBUF_ND, PyBUF_SIMPLE,
+ # compound requests
+ PyBUF_FULL, PyBUF_FULL_RO,
+ PyBUF_RECORDS, PyBUF_RECORDS_RO,
+ PyBUF_STRIDED, PyBUF_STRIDED_RO,
+ PyBUF_CONTIG, PyBUF_CONTIG_RO,
+ )
+
+ # no buffer interface
+ self.assertRaises(TypeError, py_buffer_to_contiguous, {}, 'F',
+ PyBUF_FULL_RO)
+
+ # scalar, read-only request
+ nd = ndarray(9, shape=(), format="L", flags=ND_WRITABLE)
+ for order in ['C', 'F', 'A']:
+ for request in requests:
+ b = py_buffer_to_contiguous(nd, order, request)
+ self.assertEqual(b, nd.tobytes())
+
+ # zeros in shape
+ nd = ndarray([1], shape=[0], format="L", flags=ND_WRITABLE)
+ for order in ['C', 'F', 'A']:
+ for request in requests:
+ b = py_buffer_to_contiguous(nd, order, request)
+ self.assertEqual(b, b'')
+
+ nd = ndarray(list(range(8)), shape=[2, 0, 7], format="L",
+ flags=ND_WRITABLE)
+ for order in ['C', 'F', 'A']:
+ for request in requests:
+ b = py_buffer_to_contiguous(nd, order, request)
+ self.assertEqual(b, b'')
+
+ ### One-dimensional arrays are trivial, since Fortran and C order
+ ### are the same.
+
+ # one-dimensional
+ for f in [0, ND_FORTRAN]:
+ nd = ndarray([1], shape=[1], format="h", flags=f|ND_WRITABLE)
+ ndbytes = nd.tobytes()
+ for order in ['C', 'F', 'A']:
+ for request in requests:
+ b = py_buffer_to_contiguous(nd, order, request)
+ self.assertEqual(b, ndbytes)
+
+ nd = ndarray([1, 2, 3], shape=[3], format="b", flags=f|ND_WRITABLE)
+ ndbytes = nd.tobytes()
+ for order in ['C', 'F', 'A']:
+ for request in requests:
+ b = py_buffer_to_contiguous(nd, order, request)
+ self.assertEqual(b, ndbytes)
+
+ # one-dimensional, non-contiguous input
+ nd = ndarray([1, 2, 3], shape=[2], strides=[2], flags=ND_WRITABLE)
+ ndbytes = nd.tobytes()
+ for order in ['C', 'F', 'A']:
+ for request in [PyBUF_STRIDES, PyBUF_FULL]:
+ b = py_buffer_to_contiguous(nd, order, request)
+ self.assertEqual(b, ndbytes)
+
+ nd = nd[::-1]
+ ndbytes = nd.tobytes()
+ for order in ['C', 'F', 'A']:
+ for request in requests:
+ try:
+ b = py_buffer_to_contiguous(nd, order, request)
+ except BufferError:
+ continue
+ self.assertEqual(b, ndbytes)
+
+ ###
+ ### Multi-dimensional arrays:
+ ###
+ ### The goal here is to preserve the logical representation of the
+ ### input array but change the physical representation if necessary.
+ ###
+ ### _testbuffer example:
+ ### ====================
+ ###
+ ### C input array:
+ ### --------------
+ ### >>> nd = ndarray(list(range(12)), shape=[3, 4])
+ ### >>> nd.tolist()
+ ### [[0, 1, 2, 3],
+ ### [4, 5, 6, 7],
+ ### [8, 9, 10, 11]]
+ ###
+ ### Fortran output:
+ ### ---------------
+ ### >>> py_buffer_to_contiguous(nd, 'F', PyBUF_FULL_RO)
+ ### >>> b'\x00\x04\x08\x01\x05\t\x02\x06\n\x03\x07\x0b'
+ ###
+ ### The return value corresponds to this input list for
+ ### _testbuffer's ndarray:
+ ### >>> nd = ndarray([0,4,8,1,5,9,2,6,10,3,7,11], shape=[3,4],
+ ### flags=ND_FORTRAN)
+ ### >>> nd.tolist()
+ ### [[0, 1, 2, 3],
+ ### [4, 5, 6, 7],
+ ### [8, 9, 10, 11]]
+ ###
+ ### The logical array is the same, but the values in memory are now
+ ### in Fortran order.
+ ###
+ ### NumPy example:
+ ### ==============
+ ### _testbuffer's ndarray takes lists to initialize the memory.
+ ### Here's the same sequence in NumPy:
+ ###
+ ### C input:
+ ### --------
+ ### >>> nd = ndarray(buffer=bytearray(list(range(12))),
+ ### shape=[3, 4], dtype='B')
+ ### >>> nd
+ ### array([[ 0, 1, 2, 3],
+ ### [ 4, 5, 6, 7],
+ ### [ 8, 9, 10, 11]], dtype=uint8)
+ ###
+ ### Fortran output:
+ ### ---------------
+ ### >>> fortran_buf = nd.tostring(order='F')
+ ### >>> fortran_buf
+ ### b'\x00\x04\x08\x01\x05\t\x02\x06\n\x03\x07\x0b'
+ ###
+ ### >>> nd = ndarray(buffer=fortran_buf, shape=[3, 4],
+ ### dtype='B', order='F')
+ ###
+ ### >>> nd
+ ### array([[ 0, 1, 2, 3],
+ ### [ 4, 5, 6, 7],
+ ### [ 8, 9, 10, 11]], dtype=uint8)
+ ###
+
+ # multi-dimensional, contiguous input
+ lst = list(range(12))
+ for f in [0, ND_FORTRAN]:
+ nd = ndarray(lst, shape=[3, 4], flags=f|ND_WRITABLE)
+ if numpy_array:
+ na = numpy_array(buffer=bytearray(lst),
+ shape=[3, 4], dtype='B',
+ order='C' if f == 0 else 'F')
+
+ # 'C' request
+ if f == ND_FORTRAN: # 'F' to 'C'
+ x = ndarray(transpose(lst, [4, 3]), shape=[3, 4],
+ flags=ND_WRITABLE)
+ expected = x.tobytes()
+ else:
+ expected = nd.tobytes()
+ for request in requests:
+ try:
+ b = py_buffer_to_contiguous(nd, 'C', request)
+ except BufferError:
+ continue
+
+ self.assertEqual(b, expected)
+
+ # Check that output can be used as the basis for constructing
+ # a C array that is logically identical to the input array.
+ y = ndarray([v for v in b], shape=[3, 4], flags=ND_WRITABLE)
+ self.assertEqual(memoryview(y), memoryview(nd))
+
+ if numpy_array:
+ self.assertEqual(b, na.tostring(order='C'))
+
+ # 'F' request
+ if f == 0: # 'C' to 'F'
+ x = ndarray(transpose(lst, [3, 4]), shape=[4, 3],
+ flags=ND_WRITABLE)
+ else:
+ x = ndarray(lst, shape=[3, 4], flags=ND_WRITABLE)
+ expected = x.tobytes()
+ for request in [PyBUF_FULL, PyBUF_FULL_RO, PyBUF_INDIRECT,
+ PyBUF_STRIDES, PyBUF_ND]:
+ try:
+ b = py_buffer_to_contiguous(nd, 'F', request)
+ except BufferError:
+ continue
+ self.assertEqual(b, expected)
+
+ # Check that output can be used as the basis for constructing
+ # a Fortran array that is logically identical to the input array.
+ y = ndarray([v for v in b], shape=[3, 4], flags=ND_FORTRAN|ND_WRITABLE)
+ self.assertEqual(memoryview(y), memoryview(nd))
+
+ if numpy_array:
+ self.assertEqual(b, na.tostring(order='F'))
+
+ # 'A' request
+ if f == ND_FORTRAN:
+ x = ndarray(lst, shape=[3, 4], flags=ND_WRITABLE)
+ expected = x.tobytes()
+ else:
+ expected = nd.tobytes()
+ for request in [PyBUF_FULL, PyBUF_FULL_RO, PyBUF_INDIRECT,
+ PyBUF_STRIDES, PyBUF_ND]:
+ try:
+ b = py_buffer_to_contiguous(nd, 'A', request)
+ except BufferError:
+ continue
+
+ self.assertEqual(b, expected)
+
+ # Check that output can be used as the basis for constructing
+ # an array with order=f that is logically identical to the input
+ # array.
+ y = ndarray([v for v in b], shape=[3, 4], flags=f|ND_WRITABLE)
+ self.assertEqual(memoryview(y), memoryview(nd))
+
+ if numpy_array:
+ self.assertEqual(b, na.tostring(order='A'))
+
+ # multi-dimensional, non-contiguous input
+ nd = ndarray(list(range(12)), shape=[3, 4], flags=ND_WRITABLE|ND_PIL)
+
+ # 'C'
+ b = py_buffer_to_contiguous(nd, 'C', PyBUF_FULL_RO)
+ self.assertEqual(b, nd.tobytes())
+ y = ndarray([v for v in b], shape=[3, 4], flags=ND_WRITABLE)
+ self.assertEqual(memoryview(y), memoryview(nd))
+
+ # 'F'
+ b = py_buffer_to_contiguous(nd, 'F', PyBUF_FULL_RO)
+ x = ndarray(transpose(lst, [3, 4]), shape=[4, 3], flags=ND_WRITABLE)
+ self.assertEqual(b, x.tobytes())
+ y = ndarray([v for v in b], shape=[3, 4], flags=ND_FORTRAN|ND_WRITABLE)
+ self.assertEqual(memoryview(y), memoryview(nd))
+
+ # 'A'
+ b = py_buffer_to_contiguous(nd, 'A', PyBUF_FULL_RO)
+ self.assertEqual(b, nd.tobytes())
+ y = ndarray([v for v in b], shape=[3, 4], flags=ND_WRITABLE)
+ self.assertEqual(memoryview(y), memoryview(nd))
+
+ def test_memoryview_construction(self):
+
+ items_shape = [(9, []), ([1,2,3], [3]), (list(range(2*3*5)), [2,3,5])]
+
+ # NumPy style, C-contiguous:
+ for items, shape in items_shape:
+
+ # From PEP-3118 compliant exporter:
+ ex = ndarray(items, shape=shape)
+ m = memoryview(ex)
+ self.assertTrue(m.c_contiguous)
+ self.assertTrue(m.contiguous)
+
+ ndim = len(shape)
+ strides = strides_from_shape(ndim, shape, 1, 'C')
+ lst = carray(items, shape)
+
+ self.verify(m, obj=ex,
+ itemsize=1, fmt='B', readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst)
+
+ # From memoryview:
+ m2 = memoryview(m)
+ self.verify(m2, obj=ex,
+ itemsize=1, fmt='B', readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst)
+
+ # PyMemoryView_FromBuffer(): no strides
+ nd = ndarray(ex, getbuf=PyBUF_CONTIG_RO|PyBUF_FORMAT)
+ self.assertEqual(nd.strides, ())
+ m = nd.memoryview_from_buffer()
+ self.verify(m, obj=None,
+ itemsize=1, fmt='B', readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst)
+
+ # PyMemoryView_FromBuffer(): no format, shape, strides
+ nd = ndarray(ex, getbuf=PyBUF_SIMPLE)
+ self.assertEqual(nd.format, '')
+ self.assertEqual(nd.shape, ())
+ self.assertEqual(nd.strides, ())
+ m = nd.memoryview_from_buffer()
+
+ lst = [items] if ndim == 0 else items
+ self.verify(m, obj=None,
+ itemsize=1, fmt='B', readonly=1,
+ ndim=1, shape=[ex.nbytes], strides=(1,),
+ lst=lst)
+
+ # NumPy style, Fortran contiguous:
+ for items, shape in items_shape:
+
+ # From PEP-3118 compliant exporter:
+ ex = ndarray(items, shape=shape, flags=ND_FORTRAN)
+ m = memoryview(ex)
+ self.assertTrue(m.f_contiguous)
+ self.assertTrue(m.contiguous)
+
+ ndim = len(shape)
+ strides = strides_from_shape(ndim, shape, 1, 'F')
+ lst = farray(items, shape)
+
+ self.verify(m, obj=ex,
+ itemsize=1, fmt='B', readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst)
+
+ # From memoryview:
+ m2 = memoryview(m)
+ self.verify(m2, obj=ex,
+ itemsize=1, fmt='B', readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst)
+
+ # PIL style:
+ for items, shape in items_shape[1:]:
+
+ # From PEP-3118 compliant exporter:
+ ex = ndarray(items, shape=shape, flags=ND_PIL)
+ m = memoryview(ex)
+
+ ndim = len(shape)
+ lst = carray(items, shape)
+
+ self.verify(m, obj=ex,
+ itemsize=1, fmt='B', readonly=1,
+ ndim=ndim, shape=shape, strides=ex.strides,
+ lst=lst)
+
+ # From memoryview:
+ m2 = memoryview(m)
+ self.verify(m2, obj=ex,
+ itemsize=1, fmt='B', readonly=1,
+ ndim=ndim, shape=shape, strides=ex.strides,
+ lst=lst)
+
+ # Invalid number of arguments:
+ self.assertRaises(TypeError, memoryview, b'9', 'x')
+ # Not a buffer provider:
+ self.assertRaises(TypeError, memoryview, {})
+ # Non-compliant buffer provider:
+ ex = ndarray([1,2,3], shape=[3])
+ nd = ndarray(ex, getbuf=PyBUF_SIMPLE)
+ self.assertRaises(BufferError, memoryview, nd)
+ nd = ndarray(ex, getbuf=PyBUF_CONTIG_RO|PyBUF_FORMAT)
+ self.assertRaises(BufferError, memoryview, nd)
+
+ # ndim > 64
+ nd = ndarray([1]*128, shape=[1]*128, format='L')
+ self.assertRaises(ValueError, memoryview, nd)
+ self.assertRaises(ValueError, nd.memoryview_from_buffer)
+ self.assertRaises(ValueError, get_contiguous, nd, PyBUF_READ, 'C')
+ self.assertRaises(ValueError, get_contiguous, nd, PyBUF_READ, 'F')
+ self.assertRaises(ValueError, get_contiguous, nd[::-1], PyBUF_READ, 'C')
+
+ def test_memoryview_cast_zero_shape(self):
+ # Casts are undefined if shape contains zeros. These arrays are
+ # regarded as C-contiguous by Numpy and PyBuffer_GetContiguous(),
+ # so they are not caught by the test for C-contiguity in memory_cast().
+ items = [1,2,3]
+ for shape in ([0,3,3], [3,0,3], [0,3,3]):
+ ex = ndarray(items, shape=shape)
+ self.assertTrue(ex.c_contiguous)
+ msrc = memoryview(ex)
+ self.assertRaises(TypeError, msrc.cast, 'c')
+
+ def test_memoryview_struct_module(self):
+
+ class INT(object):
+ def __init__(self, val):
+ self.val = val
+ def __int__(self):
+ return self.val
+
+ class IDX(object):
+ def __init__(self, val):
+ self.val = val
+ def __index__(self):
+ return self.val
+
+ def f(): return 7
+
+ values = [INT(9), IDX(9),
+ 2.2+3j, Decimal("-21.1"), 12.2, Fraction(5, 2),
+ [1,2,3], {4,5,6}, {7:8}, (), (9,),
+ True, False, None, NotImplemented,
+ b'a', b'abc', bytearray(b'a'), bytearray(b'abc'),
+ 'a', 'abc', r'a', r'abc',
+ f, lambda x: x]
+
+ for fmt, items, item in iter_format(10, 'memoryview'):
+ ex = ndarray(items, shape=[10], format=fmt, flags=ND_WRITABLE)
+ nd = ndarray(items, shape=[10], format=fmt, flags=ND_WRITABLE)
+ m = memoryview(ex)
+
+ struct.pack_into(fmt, nd, 0, item)
+ m[0] = item
+ self.assertEqual(m[0], nd[0])
+
+ itemsize = struct.calcsize(fmt)
+ if 'P' in fmt:
+ continue
+
+ for v in values:
+ struct_err = None
+ try:
+ struct.pack_into(fmt, nd, itemsize, v)
+ except struct.error:
+ struct_err = struct.error
+
+ mv_err = None
+ try:
+ m[1] = v
+ except (TypeError, ValueError) as e:
+ mv_err = e.__class__
+
+ if struct_err or mv_err:
+ self.assertIsNot(struct_err, None)
+ self.assertIsNot(mv_err, None)
+ else:
+ self.assertEqual(m[1], nd[1])
+
+ def test_memoryview_cast_zero_strides(self):
+ # Casts are undefined if strides contains zeros. These arrays are
+ # (sometimes!) regarded as C-contiguous by Numpy, but not by
+ # PyBuffer_GetContiguous().
+ ex = ndarray([1,2,3], shape=[3], strides=[0])
+ self.assertFalse(ex.c_contiguous)
+ msrc = memoryview(ex)
+ self.assertRaises(TypeError, msrc.cast, 'c')
+
+ def test_memoryview_cast_invalid(self):
+ # invalid format
+ for sfmt in NON_BYTE_FORMAT:
+ sformat = '@' + sfmt if randrange(2) else sfmt
+ ssize = struct.calcsize(sformat)
+ for dfmt in NON_BYTE_FORMAT:
+ dformat = '@' + dfmt if randrange(2) else dfmt
+ dsize = struct.calcsize(dformat)
+ ex = ndarray(list(range(32)), shape=[32//ssize], format=sformat)
+ msrc = memoryview(ex)
+ self.assertRaises(TypeError, msrc.cast, dfmt, [32//dsize])
+
+ for sfmt, sitems, _ in iter_format(1):
+ ex = ndarray(sitems, shape=[1], format=sfmt)
+ msrc = memoryview(ex)
+ for dfmt, _, _ in iter_format(1):
+ if (not is_memoryview_format(sfmt) or
+ not is_memoryview_format(dfmt)):
+ self.assertRaises(ValueError, msrc.cast, dfmt,
+ [32//dsize])
+ else:
+ if not is_byte_format(sfmt) and not is_byte_format(dfmt):
+ self.assertRaises(TypeError, msrc.cast, dfmt,
+ [32//dsize])
+
+ # invalid shape
+ size_h = struct.calcsize('h')
+ size_d = struct.calcsize('d')
+ ex = ndarray(list(range(2*2*size_d)), shape=[2,2,size_d], format='h')
+ msrc = memoryview(ex)
+ self.assertRaises(TypeError, msrc.cast, shape=[2,2,size_h], format='d')
+
+ ex = ndarray(list(range(120)), shape=[1,2,3,4,5])
+ m = memoryview(ex)
+
+ # incorrect number of args
+ self.assertRaises(TypeError, m.cast)
+ self.assertRaises(TypeError, m.cast, 1, 2, 3)
+
+ # incorrect dest format type
+ self.assertRaises(TypeError, m.cast, {})
+
+ # incorrect dest format
+ self.assertRaises(ValueError, m.cast, "X")
+ self.assertRaises(ValueError, m.cast, "@X")
+ self.assertRaises(ValueError, m.cast, "@XY")
+
+ # dest format not implemented
+ self.assertRaises(ValueError, m.cast, "=B")
+ self.assertRaises(ValueError, m.cast, "!L")
+ self.assertRaises(ValueError, m.cast, "<P")
+ self.assertRaises(ValueError, m.cast, ">l")
+ self.assertRaises(ValueError, m.cast, "BI")
+ self.assertRaises(ValueError, m.cast, "xBI")
+
+ # src format not implemented
+ ex = ndarray([(1,2), (3,4)], shape=[2], format="II")
+ m = memoryview(ex)
+ self.assertRaises(NotImplementedError, m.__getitem__, 0)
+ self.assertRaises(NotImplementedError, m.__setitem__, 0, 8)
+ self.assertRaises(NotImplementedError, m.tolist)
+
+ # incorrect shape type
+ ex = ndarray(list(range(120)), shape=[1,2,3,4,5])
+ m = memoryview(ex)
+ self.assertRaises(TypeError, m.cast, "B", shape={})
+
+ # incorrect shape elements
+ ex = ndarray(list(range(120)), shape=[2*3*4*5])
+ m = memoryview(ex)
+ self.assertRaises(OverflowError, m.cast, "B", shape=[2**64])
+ self.assertRaises(ValueError, m.cast, "B", shape=[-1])
+ self.assertRaises(ValueError, m.cast, "B", shape=[2,3,4,5,6,7,-1])
+ self.assertRaises(ValueError, m.cast, "B", shape=[2,3,4,5,6,7,0])
+ self.assertRaises(TypeError, m.cast, "B", shape=[2,3,4,5,6,7,'x'])
+
+ # N-D -> N-D cast
+ ex = ndarray(list([9 for _ in range(3*5*7*11)]), shape=[3,5,7,11])
+ m = memoryview(ex)
+ self.assertRaises(TypeError, m.cast, "I", shape=[2,3,4,5])
+
+ # cast with ndim > 64
+ nd = ndarray(list(range(128)), shape=[128], format='I')
+ m = memoryview(nd)
+ self.assertRaises(ValueError, m.cast, 'I', [1]*128)
+
+ # view->len not a multiple of itemsize
+ ex = ndarray(list([9 for _ in range(3*5*7*11)]), shape=[3*5*7*11])
+ m = memoryview(ex)
+ self.assertRaises(TypeError, m.cast, "I", shape=[2,3,4,5])
+
+ # product(shape) * itemsize != buffer size
+ ex = ndarray(list([9 for _ in range(3*5*7*11)]), shape=[3*5*7*11])
+ m = memoryview(ex)
+ self.assertRaises(TypeError, m.cast, "B", shape=[2,3,4,5])
+
+ # product(shape) * itemsize overflow
+ nd = ndarray(list(range(128)), shape=[128], format='I')
+ m1 = memoryview(nd)
+ nd = ndarray(list(range(128)), shape=[128], format='B')
+ m2 = memoryview(nd)
+ if sys.maxsize == 2**63-1:
+ self.assertRaises(TypeError, m1.cast, 'B',
+ [7, 7, 73, 127, 337, 92737, 649657])
+ self.assertRaises(ValueError, m1.cast, 'B',
+ [2**20, 2**20, 2**10, 2**10, 2**3])
+ self.assertRaises(ValueError, m2.cast, 'I',
+ [2**20, 2**20, 2**10, 2**10, 2**1])
+ else:
+ self.assertRaises(TypeError, m1.cast, 'B',
+ [1, 2147483647])
+ self.assertRaises(ValueError, m1.cast, 'B',
+ [2**10, 2**10, 2**5, 2**5, 2**1])
+ self.assertRaises(ValueError, m2.cast, 'I',
+ [2**10, 2**10, 2**5, 2**3, 2**1])
+
+ def test_memoryview_cast(self):
+ bytespec = (
+ ('B', lambda ex: list(ex.tobytes())),
+ ('b', lambda ex: [x-256 if x > 127 else x for x in list(ex.tobytes())]),
+ ('c', lambda ex: [bytes(chr(x), 'latin-1') for x in list(ex.tobytes())]),
+ )
+
+ def iter_roundtrip(ex, m, items, fmt):
+ srcsize = struct.calcsize(fmt)
+ for bytefmt, to_bytelist in bytespec:
+
+ m2 = m.cast(bytefmt)
+ lst = to_bytelist(ex)
+ self.verify(m2, obj=ex,
+ itemsize=1, fmt=bytefmt, readonly=0,
+ ndim=1, shape=[31*srcsize], strides=(1,),
+ lst=lst, cast=True)
+
+ m3 = m2.cast(fmt)
+ self.assertEqual(m3, ex)
+ lst = ex.tolist()
+ self.verify(m3, obj=ex,
+ itemsize=srcsize, fmt=fmt, readonly=0,
+ ndim=1, shape=[31], strides=(srcsize,),
+ lst=lst, cast=True)
+
+ # cast from ndim = 0 to ndim = 1
+ srcsize = struct.calcsize('I')
+ ex = ndarray(9, shape=[], format='I')
+ destitems, destshape = cast_items(ex, 'B', 1)
+ m = memoryview(ex)
+ m2 = m.cast('B')
+ self.verify(m2, obj=ex,
+ itemsize=1, fmt='B', readonly=1,
+ ndim=1, shape=destshape, strides=(1,),
+ lst=destitems, cast=True)
+
+ # cast from ndim = 1 to ndim = 0
+ destsize = struct.calcsize('I')
+ ex = ndarray([9]*destsize, shape=[destsize], format='B')
+ destitems, destshape = cast_items(ex, 'I', destsize, shape=[])
+ m = memoryview(ex)
+ m2 = m.cast('I', shape=[])
+ self.verify(m2, obj=ex,
+ itemsize=destsize, fmt='I', readonly=1,
+ ndim=0, shape=(), strides=(),
+ lst=destitems, cast=True)
+
+ # array.array: roundtrip to/from bytes
+ for fmt, items, _ in iter_format(31, 'array'):
+ ex = array.array(fmt, items)
+ m = memoryview(ex)
+ iter_roundtrip(ex, m, items, fmt)
+
+ # ndarray: roundtrip to/from bytes
+ for fmt, items, _ in iter_format(31, 'memoryview'):
+ ex = ndarray(items, shape=[31], format=fmt, flags=ND_WRITABLE)
+ m = memoryview(ex)
+ iter_roundtrip(ex, m, items, fmt)
+
+ def test_memoryview_cast_1D_ND(self):
+ # Cast between C-contiguous buffers. At least one buffer must
+ # be 1D, at least one format must be 'c', 'b' or 'B'.
+ for _tshape in gencastshapes():
+ for char in fmtdict['@']:
+ tfmt = ('', '@')[randrange(2)] + char
+ tsize = struct.calcsize(tfmt)
+ n = prod(_tshape) * tsize
+ obj = 'memoryview' if is_byte_format(tfmt) else 'bytefmt'
+ for fmt, items, _ in iter_format(n, obj):
+ size = struct.calcsize(fmt)
+ shape = [n] if n > 0 else []
+ tshape = _tshape + [size]
+
+ ex = ndarray(items, shape=shape, format=fmt)
+ m = memoryview(ex)
+
+ titems, tshape = cast_items(ex, tfmt, tsize, shape=tshape)
+
+ if titems is None:
+ self.assertRaises(TypeError, m.cast, tfmt, tshape)
+ continue
+ if titems == 'nan':
+ continue # NaNs in lists are a recipe for trouble.
+
+ # 1D -> ND
+ nd = ndarray(titems, shape=tshape, format=tfmt)
+
+ m2 = m.cast(tfmt, shape=tshape)
+ ndim = len(tshape)
+ strides = nd.strides
+ lst = nd.tolist()
+ self.verify(m2, obj=ex,
+ itemsize=tsize, fmt=tfmt, readonly=1,
+ ndim=ndim, shape=tshape, strides=strides,
+ lst=lst, cast=True)
+
+ # ND -> 1D
+ m3 = m2.cast(fmt)
+ m4 = m2.cast(fmt, shape=shape)
+ ndim = len(shape)
+ strides = ex.strides
+ lst = ex.tolist()
+
+ self.verify(m3, obj=ex,
+ itemsize=size, fmt=fmt, readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst, cast=True)
+
+ self.verify(m4, obj=ex,
+ itemsize=size, fmt=fmt, readonly=1,
+ ndim=ndim, shape=shape, strides=strides,
+ lst=lst, cast=True)
+
+ def test_memoryview_tolist(self):
+
+ # Most tolist() tests are in self.verify() etc.
+
+ a = array.array('h', list(range(-6, 6)))
+ m = memoryview(a)
+ self.assertEqual(m, a)
+ self.assertEqual(m.tolist(), a.tolist())
+
+ a = a[2::3]
+ m = m[2::3]
+ self.assertEqual(m, a)
+ self.assertEqual(m.tolist(), a.tolist())
+
+ ex = ndarray(list(range(2*3*5*7*11)), shape=[11,2,7,3,5], format='L')
+ m = memoryview(ex)
+ self.assertEqual(m.tolist(), ex.tolist())
+
+ ex = ndarray([(2, 5), (7, 11)], shape=[2], format='lh')
+ m = memoryview(ex)
+ self.assertRaises(NotImplementedError, m.tolist)
+
+ ex = ndarray([b'12345'], shape=[1], format="s")
+ m = memoryview(ex)
+ self.assertRaises(NotImplementedError, m.tolist)
+
+ ex = ndarray([b"a",b"b",b"c",b"d",b"e",b"f"], shape=[2,3], format='s')
+ m = memoryview(ex)
+ self.assertRaises(NotImplementedError, m.tolist)
+
+ def test_memoryview_repr(self):
+ m = memoryview(bytearray(9))
+ r = m.__repr__()
+ self.assertTrue(r.startswith("<memory"))
+
+ m.release()
+ r = m.__repr__()
+ self.assertTrue(r.startswith("<released"))
+
+ def test_memoryview_sequence(self):
+
+ for fmt in ('d', 'f'):
+ inf = float(3e400)
+ ex = array.array(fmt, [1.0, inf, 3.0])
+ m = memoryview(ex)
+ self.assertIn(1.0, m)
+ self.assertIn(5e700, m)
+ self.assertIn(3.0, m)
+
+ ex = ndarray(9.0, [], format='f')
+ m = memoryview(ex)
+ self.assertRaises(TypeError, eval, "9.0 in m", locals())
+
+ def test_memoryview_index(self):
+
+ # ndim = 0
+ ex = ndarray(12.5, shape=[], format='d')
+ m = memoryview(ex)
+ self.assertEqual(m[()], 12.5)
+ self.assertEqual(m[...], m)
+ self.assertEqual(m[...], ex)
+ self.assertRaises(TypeError, m.__getitem__, 0)
+
+ ex = ndarray((1,2,3), shape=[], format='iii')
+ m = memoryview(ex)
+ self.assertRaises(NotImplementedError, m.__getitem__, ())
+
+ # range
+ ex = ndarray(list(range(7)), shape=[7], flags=ND_WRITABLE)
+ m = memoryview(ex)
+
+ self.assertRaises(IndexError, m.__getitem__, 2**64)
+ self.assertRaises(TypeError, m.__getitem__, 2.0)
+ self.assertRaises(TypeError, m.__getitem__, 0.0)
+
+ # out of bounds
+ self.assertRaises(IndexError, m.__getitem__, -8)
+ self.assertRaises(IndexError, m.__getitem__, 8)
+
+ # Not implemented: multidimensional sub-views
+ ex = ndarray(list(range(12)), shape=[3,4], flags=ND_WRITABLE)
+ m = memoryview(ex)
+
+ self.assertRaises(NotImplementedError, m.__getitem__, 0)
+ self.assertRaises(NotImplementedError, m.__setitem__, 0, 9)
+ self.assertRaises(NotImplementedError, m.__getitem__, 0)
+
+ def test_memoryview_assign(self):
+
+ # ndim = 0
+ ex = ndarray(12.5, shape=[], format='f', flags=ND_WRITABLE)
+ m = memoryview(ex)
+ m[()] = 22.5
+ self.assertEqual(m[()], 22.5)
+ m[...] = 23.5
+ self.assertEqual(m[()], 23.5)
+ self.assertRaises(TypeError, m.__setitem__, 0, 24.7)
+
+ # read-only
+ ex = ndarray(list(range(7)), shape=[7])
+ m = memoryview(ex)
+ self.assertRaises(TypeError, m.__setitem__, 2, 10)
+
+ # range
+ ex = ndarray(list(range(7)), shape=[7], flags=ND_WRITABLE)
+ m = memoryview(ex)
+
+ self.assertRaises(IndexError, m.__setitem__, 2**64, 9)
+ self.assertRaises(TypeError, m.__setitem__, 2.0, 10)
+ self.assertRaises(TypeError, m.__setitem__, 0.0, 11)
+
+ # out of bounds
+ self.assertRaises(IndexError, m.__setitem__, -8, 20)
+ self.assertRaises(IndexError, m.__setitem__, 8, 25)
+
+ # pack_single() success:
+ for fmt in fmtdict['@']:
+ if fmt == 'c' or fmt == '?':
+ continue
+ ex = ndarray([1,2,3], shape=[3], format=fmt, flags=ND_WRITABLE)
+ m = memoryview(ex)
+ i = randrange(-3, 3)
+ m[i] = 8
+ self.assertEqual(m[i], 8)
+ self.assertEqual(m[i], ex[i])
+
+ ex = ndarray([b'1', b'2', b'3'], shape=[3], format='c',
+ flags=ND_WRITABLE)
+ m = memoryview(ex)
+ m[2] = b'9'
+ self.assertEqual(m[2], b'9')
+
+ ex = ndarray([True, False, True], shape=[3], format='?',
+ flags=ND_WRITABLE)
+ m = memoryview(ex)
+ m[1] = True
+ self.assertEqual(m[1], True)
+
+ # pack_single() exceptions:
+ nd = ndarray([b'x'], shape=[1], format='c', flags=ND_WRITABLE)
+ m = memoryview(nd)
+ self.assertRaises(TypeError, m.__setitem__, 0, 100)
+
+ ex = ndarray(list(range(120)), shape=[1,2,3,4,5], flags=ND_WRITABLE)
+ m1 = memoryview(ex)
+
+ for fmt, _range in fmtdict['@'].items():
+ if (fmt == '?'): # PyObject_IsTrue() accepts anything
+ continue
+ if fmt == 'c': # special case tested above
+ continue
+ m2 = m1.cast(fmt)
+ lo, hi = _range
+ if fmt == 'd' or fmt == 'f':
+ lo, hi = -2**1024, 2**1024
+ if fmt != 'P': # PyLong_AsVoidPtr() accepts negative numbers
+ self.assertRaises(ValueError, m2.__setitem__, 0, lo-1)
+ self.assertRaises(TypeError, m2.__setitem__, 0, "xyz")
+ self.assertRaises(ValueError, m2.__setitem__, 0, hi)
+
+ # invalid item
+ m2 = m1.cast('c')
+ self.assertRaises(ValueError, m2.__setitem__, 0, b'\xff\xff')
+
+ # format not implemented
+ ex = ndarray(list(range(1)), shape=[1], format="xL", flags=ND_WRITABLE)
+ m = memoryview(ex)
+ self.assertRaises(NotImplementedError, m.__setitem__, 0, 1)
+
+ ex = ndarray([b'12345'], shape=[1], format="s", flags=ND_WRITABLE)
+ m = memoryview(ex)
+ self.assertRaises(NotImplementedError, m.__setitem__, 0, 1)
+
+ # Not implemented: multidimensional sub-views
+ ex = ndarray(list(range(12)), shape=[3,4], flags=ND_WRITABLE)
+ m = memoryview(ex)
+
+ self.assertRaises(NotImplementedError, m.__setitem__, 0, [2, 3])
+
+ def test_memoryview_slice(self):
+
+ ex = ndarray(list(range(12)), shape=[12], flags=ND_WRITABLE)
+ m = memoryview(ex)
+
+ # zero step
+ self.assertRaises(ValueError, m.__getitem__, slice(0,2,0))
+ self.assertRaises(ValueError, m.__setitem__, slice(0,2,0),
+ bytearray([1,2]))
+
+ # invalid slice key
+ self.assertRaises(TypeError, m.__getitem__, ())
+
+ # multidimensional slices
+ ex = ndarray(list(range(12)), shape=[12], flags=ND_WRITABLE)
+ m = memoryview(ex)
+
+ self.assertRaises(NotImplementedError, m.__getitem__,
+ (slice(0,2,1), slice(0,2,1)))
+ self.assertRaises(NotImplementedError, m.__setitem__,
+ (slice(0,2,1), slice(0,2,1)), bytearray([1,2]))
+
+ # invalid slice tuple
+ self.assertRaises(TypeError, m.__getitem__, (slice(0,2,1), {}))
+ self.assertRaises(TypeError, m.__setitem__, (slice(0,2,1), {}),
+ bytearray([1,2]))
+
+ # rvalue is not an exporter
+ self.assertRaises(TypeError, m.__setitem__, slice(0,1,1), [1])
+
+ # non-contiguous slice assignment
+ for flags in (0, ND_PIL):
+ ex1 = ndarray(list(range(12)), shape=[12], strides=[-1], offset=11,
+ flags=ND_WRITABLE|flags)
+ ex2 = ndarray(list(range(24)), shape=[12], strides=[2], flags=flags)
+ m1 = memoryview(ex1)
+ m2 = memoryview(ex2)
+
+ ex1[2:5] = ex1[2:5]
+ m1[2:5] = m2[2:5]
+
+ self.assertEqual(m1, ex1)
+ self.assertEqual(m2, ex2)
+
+ ex1[1:3][::-1] = ex2[0:2][::1]
+ m1[1:3][::-1] = m2[0:2][::1]
+
+ self.assertEqual(m1, ex1)
+ self.assertEqual(m2, ex2)
+
+ ex1[4:1:-2][::-1] = ex1[1:4:2][::1]
+ m1[4:1:-2][::-1] = m1[1:4:2][::1]
+
+ self.assertEqual(m1, ex1)
+ self.assertEqual(m2, ex2)
+
+ def test_memoryview_array(self):
+
+ def cmptest(testcase, a, b, m, singleitem):
+ for i, _ in enumerate(a):
+ ai = a[i]
+ mi = m[i]
+ testcase.assertEqual(ai, mi)
+ a[i] = singleitem
+ if singleitem != ai:
+ testcase.assertNotEqual(a, m)
+ testcase.assertNotEqual(a, b)
+ else:
+ testcase.assertEqual(a, m)
+ testcase.assertEqual(a, b)
+ m[i] = singleitem
+ testcase.assertEqual(a, m)
+ testcase.assertEqual(b, m)
+ a[i] = ai
+ m[i] = mi
+
+ for n in range(1, 5):
+ for fmt, items, singleitem in iter_format(n, 'array'):
+ for lslice in genslices(n):
+ for rslice in genslices(n):
+
+ a = array.array(fmt, items)
+ b = array.array(fmt, items)
+ m = memoryview(b)
+
+ self.assertEqual(m, a)
+ self.assertEqual(m.tolist(), a.tolist())
+ self.assertEqual(m.tobytes(), a.tobytes())
+ self.assertEqual(len(m), len(a))
+
+ cmptest(self, a, b, m, singleitem)
+
+ array_err = None
+ have_resize = None
+ try:
+ al = a[lslice]
+ ar = a[rslice]
+ a[lslice] = a[rslice]
+ have_resize = len(al) != len(ar)
+ except Exception as e:
+ array_err = e.__class__
+
+ m_err = None
+ try:
+ m[lslice] = m[rslice]
+ except Exception as e:
+ m_err = e.__class__
+
+ if have_resize: # memoryview cannot change shape
+ self.assertIs(m_err, ValueError)
+ elif m_err or array_err:
+ self.assertIs(m_err, array_err)
+ else:
+ self.assertEqual(m, a)
+ self.assertEqual(m.tolist(), a.tolist())
+ self.assertEqual(m.tobytes(), a.tobytes())
+ cmptest(self, a, b, m, singleitem)
+
+ def test_memoryview_compare_special_cases(self):
+
+ a = array.array('L', [1, 2, 3])
+ b = array.array('L', [1, 2, 7])
+
+ # Ordering comparisons raise:
+ v = memoryview(a)
+ w = memoryview(b)
+ for attr in ('__lt__', '__le__', '__gt__', '__ge__'):
+ self.assertIs(getattr(v, attr)(w), NotImplemented)
+ self.assertIs(getattr(a, attr)(v), NotImplemented)
+
+ # Released views compare equal to themselves:
+ v = memoryview(a)
+ v.release()
+ self.assertEqual(v, v)
+ self.assertNotEqual(v, a)
+ self.assertNotEqual(a, v)
+
+ v = memoryview(a)
+ w = memoryview(a)
+ w.release()
+ self.assertNotEqual(v, w)
+ self.assertNotEqual(w, v)
+
+ # Operand does not implement the buffer protocol:
+ v = memoryview(a)
+ self.assertNotEqual(v, [1, 2, 3])
+
+ # NaNs
+ nd = ndarray([(0, 0)], shape=[1], format='l x d x', flags=ND_WRITABLE)
+ nd[0] = (-1, float('nan'))
+ self.assertNotEqual(memoryview(nd), nd)
+
+ # Depends on issue #15625: the struct module does not understand 'u'.
+ a = array.array('u', 'xyz')
+ v = memoryview(a)
+ self.assertNotEqual(a, v)
+ self.assertNotEqual(v, a)
+
+ # Some ctypes format strings are unknown to the struct module.
+ if ctypes:
+ # format: "T{>l:x:>l:y:}"
+ class BEPoint(ctypes.BigEndianStructure):
+ _fields_ = [("x", ctypes.c_long), ("y", ctypes.c_long)]
+ point = BEPoint(100, 200)
+ a = memoryview(point)
+ b = memoryview(point)
+ self.assertNotEqual(a, b)
+ self.assertNotEqual(a, point)
+ self.assertNotEqual(point, a)
+ self.assertRaises(NotImplementedError, a.tolist)
+
+ def test_memoryview_compare_ndim_zero(self):
+
+ nd1 = ndarray(1729, shape=[], format='@L')
+ nd2 = ndarray(1729, shape=[], format='L', flags=ND_WRITABLE)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+ self.assertEqual(v, w)
+ self.assertEqual(w, v)
+ self.assertEqual(v, nd2)
+ self.assertEqual(nd2, v)
+ self.assertEqual(w, nd1)
+ self.assertEqual(nd1, w)
+
+ self.assertFalse(v.__ne__(w))
+ self.assertFalse(w.__ne__(v))
+
+ w[()] = 1728
+ self.assertNotEqual(v, w)
+ self.assertNotEqual(w, v)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(nd2, v)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(nd1, w)
+
+ self.assertFalse(v.__eq__(w))
+ self.assertFalse(w.__eq__(v))
+
+ nd = ndarray(list(range(12)), shape=[12], flags=ND_WRITABLE|ND_PIL)
+ ex = ndarray(list(range(12)), shape=[12], flags=ND_WRITABLE|ND_PIL)
+ m = memoryview(ex)
+
+ self.assertEqual(m, nd)
+ m[9] = 100
+ self.assertNotEqual(m, nd)
+
+ # struct module: equal
+ nd1 = ndarray((1729, 1.2, b'12345'), shape=[], format='Lf5s')
+ nd2 = ndarray((1729, 1.2, b'12345'), shape=[], format='hf5s',
+ flags=ND_WRITABLE)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+ self.assertEqual(v, w)
+ self.assertEqual(w, v)
+ self.assertEqual(v, nd2)
+ self.assertEqual(nd2, v)
+ self.assertEqual(w, nd1)
+ self.assertEqual(nd1, w)
+
+ # struct module: not equal
+ nd1 = ndarray((1729, 1.2, b'12345'), shape=[], format='Lf5s')
+ nd2 = ndarray((-1729, 1.2, b'12345'), shape=[], format='hf5s',
+ flags=ND_WRITABLE)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+ self.assertNotEqual(v, w)
+ self.assertNotEqual(w, v)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(nd2, v)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(nd1, w)
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+
+ def test_memoryview_compare_ndim_one(self):
+
+ # contiguous
+ nd1 = ndarray([-529, 576, -625, 676, -729], shape=[5], format='@h')
+ nd2 = ndarray([-529, 576, -625, 676, 729], shape=[5], format='@h')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # contiguous, struct module
+ nd1 = ndarray([-529, 576, -625, 676, -729], shape=[5], format='<i')
+ nd2 = ndarray([-529, 576, -625, 676, 729], shape=[5], format='>h')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # non-contiguous
+ nd1 = ndarray([-529, -625, -729], shape=[3], format='@h')
+ nd2 = ndarray([-529, 576, -625, 676, -729], shape=[5], format='@h')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd2[::2])
+ self.assertEqual(w[::2], nd1)
+ self.assertEqual(v, w[::2])
+ self.assertEqual(v[::-1], w[::-2])
+
+ # non-contiguous, struct module
+ nd1 = ndarray([-529, -625, -729], shape=[3], format='!h')
+ nd2 = ndarray([-529, 576, -625, 676, -729], shape=[5], format='<l')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd2[::2])
+ self.assertEqual(w[::2], nd1)
+ self.assertEqual(v, w[::2])
+ self.assertEqual(v[::-1], w[::-2])
+
+ # non-contiguous, suboffsets
+ nd1 = ndarray([-529, -625, -729], shape=[3], format='@h')
+ nd2 = ndarray([-529, 576, -625, 676, -729], shape=[5], format='@h',
+ flags=ND_PIL)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd2[::2])
+ self.assertEqual(w[::2], nd1)
+ self.assertEqual(v, w[::2])
+ self.assertEqual(v[::-1], w[::-2])
+
+ # non-contiguous, suboffsets, struct module
+ nd1 = ndarray([-529, -625, -729], shape=[3], format='h 0c')
+ nd2 = ndarray([-529, 576, -625, 676, -729], shape=[5], format='> h',
+ flags=ND_PIL)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd2[::2])
+ self.assertEqual(w[::2], nd1)
+ self.assertEqual(v, w[::2])
+ self.assertEqual(v[::-1], w[::-2])
+
+ def test_memoryview_compare_zero_shape(self):
+
+ # zeros in shape
+ nd1 = ndarray([900, 961], shape=[0], format='@h')
+ nd2 = ndarray([-900, -961], shape=[0], format='@h')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ # zeros in shape, struct module
+ nd1 = ndarray([900, 961], shape=[0], format='= h0c')
+ nd2 = ndarray([-900, -961], shape=[0], format='@ i')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ def test_memoryview_compare_zero_strides(self):
+
+ # zero strides
+ nd1 = ndarray([900, 900, 900, 900], shape=[4], format='@L')
+ nd2 = ndarray([900], shape=[4], strides=[0], format='L')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ # zero strides, struct module
+ nd1 = ndarray([(900, 900)]*4, shape=[4], format='@ Li')
+ nd2 = ndarray([(900, 900)], shape=[4], strides=[0], format='!L h')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ def test_memoryview_compare_random_formats(self):
+
+ # random single character native formats
+ n = 10
+ for char in fmtdict['@m']:
+ fmt, items, singleitem = randitems(n, 'memoryview', '@', char)
+ for flags in (0, ND_PIL):
+ nd = ndarray(items, shape=[n], format=fmt, flags=flags)
+ m = memoryview(nd)
+ self.assertEqual(m, nd)
+
+ nd = nd[::-3]
+ m = memoryview(nd)
+ self.assertEqual(m, nd)
+
+ # random formats
+ n = 10
+ for _ in range(100):
+ fmt, items, singleitem = randitems(n)
+ for flags in (0, ND_PIL):
+ nd = ndarray(items, shape=[n], format=fmt, flags=flags)
+ m = memoryview(nd)
+ self.assertEqual(m, nd)
+
+ nd = nd[::-3]
+ m = memoryview(nd)
+ self.assertEqual(m, nd)
+
+ def test_memoryview_compare_multidim_c(self):
+
+ # C-contiguous, different values
+ nd1 = ndarray(list(range(-15, 15)), shape=[3, 2, 5], format='@h')
+ nd2 = ndarray(list(range(0, 30)), shape=[3, 2, 5], format='@h')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # C-contiguous, different values, struct module
+ nd1 = ndarray([(0, 1, 2)]*30, shape=[3, 2, 5], format='=f q xxL')
+ nd2 = ndarray([(-1.2, 1, 2)]*30, shape=[3, 2, 5], format='< f 2Q')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # C-contiguous, different shape
+ nd1 = ndarray(list(range(30)), shape=[2, 3, 5], format='L')
+ nd2 = ndarray(list(range(30)), shape=[3, 2, 5], format='L')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # C-contiguous, different shape, struct module
+ nd1 = ndarray([(0, 1, 2)]*21, shape=[3, 7], format='! b B xL')
+ nd2 = ndarray([(0, 1, 2)]*21, shape=[7, 3], format='= Qx l xxL')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # C-contiguous, different format, struct module
+ nd1 = ndarray(list(range(30)), shape=[2, 3, 5], format='L')
+ nd2 = ndarray(list(range(30)), shape=[2, 3, 5], format='l')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ def test_memoryview_compare_multidim_fortran(self):
+
+ # Fortran-contiguous, different values
+ nd1 = ndarray(list(range(-15, 15)), shape=[5, 2, 3], format='@h',
+ flags=ND_FORTRAN)
+ nd2 = ndarray(list(range(0, 30)), shape=[5, 2, 3], format='@h',
+ flags=ND_FORTRAN)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # Fortran-contiguous, different values, struct module
+ nd1 = ndarray([(2**64-1, -1)]*6, shape=[2, 3], format='=Qq',
+ flags=ND_FORTRAN)
+ nd2 = ndarray([(-1, 2**64-1)]*6, shape=[2, 3], format='=qQ',
+ flags=ND_FORTRAN)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # Fortran-contiguous, different shape
+ nd1 = ndarray(list(range(-15, 15)), shape=[2, 3, 5], format='l',
+ flags=ND_FORTRAN)
+ nd2 = ndarray(list(range(-15, 15)), shape=[3, 2, 5], format='l',
+ flags=ND_FORTRAN)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # Fortran-contiguous, different shape, struct module
+ nd1 = ndarray(list(range(-15, 15)), shape=[2, 3, 5], format='0ll',
+ flags=ND_FORTRAN)
+ nd2 = ndarray(list(range(-15, 15)), shape=[3, 2, 5], format='l',
+ flags=ND_FORTRAN)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # Fortran-contiguous, different format, struct module
+ nd1 = ndarray(list(range(30)), shape=[5, 2, 3], format='@h',
+ flags=ND_FORTRAN)
+ nd2 = ndarray(list(range(30)), shape=[5, 2, 3], format='@b',
+ flags=ND_FORTRAN)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ def test_memoryview_compare_multidim_mixed(self):
+
+ # mixed C/Fortran contiguous
+ lst1 = list(range(-15, 15))
+ lst2 = transpose(lst1, [3, 2, 5])
+ nd1 = ndarray(lst1, shape=[3, 2, 5], format='@l')
+ nd2 = ndarray(lst2, shape=[3, 2, 5], format='l', flags=ND_FORTRAN)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, w)
+
+ # mixed C/Fortran contiguous, struct module
+ lst1 = [(-3.3, -22, b'x')]*30
+ lst1[5] = (-2.2, -22, b'x')
+ lst2 = transpose(lst1, [3, 2, 5])
+ nd1 = ndarray(lst1, shape=[3, 2, 5], format='d b c')
+ nd2 = ndarray(lst2, shape=[3, 2, 5], format='d h c', flags=ND_FORTRAN)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, w)
+
+ # different values, non-contiguous
+ ex1 = ndarray(list(range(40)), shape=[5, 8], format='@I')
+ nd1 = ex1[3:1:-1, ::-2]
+ ex2 = ndarray(list(range(40)), shape=[5, 8], format='I')
+ nd2 = ex2[1:3:1, ::-2]
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # same values, non-contiguous, struct module
+ ex1 = ndarray([(2**31-1, -2**31)]*22, shape=[11, 2], format='=ii')
+ nd1 = ex1[3:1:-1, ::-2]
+ ex2 = ndarray([(2**31-1, -2**31)]*22, shape=[11, 2], format='>ii')
+ nd2 = ex2[1:3:1, ::-2]
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ # different shape
+ ex1 = ndarray(list(range(30)), shape=[2, 3, 5], format='b')
+ nd1 = ex1[1:3:, ::-2]
+ nd2 = ndarray(list(range(30)), shape=[3, 2, 5], format='b')
+ nd2 = ex2[1:3:, ::-2]
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # different shape, struct module
+ ex1 = ndarray(list(range(30)), shape=[2, 3, 5], format='B')
+ nd1 = ex1[1:3:, ::-2]
+ nd2 = ndarray(list(range(30)), shape=[3, 2, 5], format='b')
+ nd2 = ex2[1:3:, ::-2]
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # different format, struct module
+ ex1 = ndarray([(2, b'123')]*30, shape=[5, 3, 2], format='b3s')
+ nd1 = ex1[1:3:, ::-2]
+ nd2 = ndarray([(2, b'123')]*30, shape=[5, 3, 2], format='i3s')
+ nd2 = ex2[1:3:, ::-2]
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ def test_memoryview_compare_multidim_zero_shape(self):
+
+ # zeros in shape
+ nd1 = ndarray(list(range(30)), shape=[0, 3, 2], format='i')
+ nd2 = ndarray(list(range(30)), shape=[5, 0, 2], format='@i')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # zeros in shape, struct module
+ nd1 = ndarray(list(range(30)), shape=[0, 3, 2], format='i')
+ nd2 = ndarray(list(range(30)), shape=[5, 0, 2], format='@i')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ def test_memoryview_compare_multidim_zero_strides(self):
+
+ # zero strides
+ nd1 = ndarray([900]*80, shape=[4, 5, 4], format='@L')
+ nd2 = ndarray([900], shape=[4, 5, 4], strides=[0, 0, 0], format='L')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+ self.assertEqual(v.tolist(), w.tolist())
+
+ # zero strides, struct module
+ nd1 = ndarray([(1, 2)]*10, shape=[2, 5], format='=lQ')
+ nd2 = ndarray([(1, 2)], shape=[2, 5], strides=[0, 0], format='<lQ')
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ def test_memoryview_compare_multidim_suboffsets(self):
+
+ # suboffsets
+ ex1 = ndarray(list(range(40)), shape=[5, 8], format='@I')
+ nd1 = ex1[3:1:-1, ::-2]
+ ex2 = ndarray(list(range(40)), shape=[5, 8], format='I', flags=ND_PIL)
+ nd2 = ex2[1:3:1, ::-2]
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # suboffsets, struct module
+ ex1 = ndarray([(2**64-1, -1)]*40, shape=[5, 8], format='=Qq',
+ flags=ND_WRITABLE)
+ ex1[2][7] = (1, -2)
+ nd1 = ex1[3:1:-1, ::-2]
+
+ ex2 = ndarray([(2**64-1, -1)]*40, shape=[5, 8], format='>Qq',
+ flags=ND_PIL|ND_WRITABLE)
+ ex2[2][7] = (1, -2)
+ nd2 = ex2[1:3:1, ::-2]
+
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ # suboffsets, different shape
+ ex1 = ndarray(list(range(30)), shape=[2, 3, 5], format='b',
+ flags=ND_PIL)
+ nd1 = ex1[1:3:, ::-2]
+ nd2 = ndarray(list(range(30)), shape=[3, 2, 5], format='b')
+ nd2 = ex2[1:3:, ::-2]
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # suboffsets, different shape, struct module
+ ex1 = ndarray([(2**8-1, -1)]*40, shape=[2, 3, 5], format='Bb',
+ flags=ND_PIL|ND_WRITABLE)
+ nd1 = ex1[1:2:, ::-2]
+
+ ex2 = ndarray([(2**8-1, -1)]*40, shape=[3, 2, 5], format='Bb')
+ nd2 = ex2[1:2:, ::-2]
+
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # suboffsets, different format
+ ex1 = ndarray(list(range(30)), shape=[5, 3, 2], format='i', flags=ND_PIL)
+ nd1 = ex1[1:3:, ::-2]
+ ex2 = ndarray(list(range(30)), shape=[5, 3, 2], format='@I', flags=ND_PIL)
+ nd2 = ex2[1:3:, ::-2]
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, nd2)
+ self.assertEqual(w, nd1)
+ self.assertEqual(v, w)
+
+ # suboffsets, different format, struct module
+ ex1 = ndarray([(b'hello', b'', 1)]*27, shape=[3, 3, 3], format='5s0sP',
+ flags=ND_PIL|ND_WRITABLE)
+ ex1[1][2][2] = (b'sushi', b'', 1)
+ nd1 = ex1[1:3:, ::-2]
+
+ ex2 = ndarray([(b'hello', b'', 1)]*27, shape=[3, 3, 3], format='5s0sP',
+ flags=ND_PIL|ND_WRITABLE)
+ ex1[1][2][2] = (b'sushi', b'', 1)
+ nd2 = ex2[1:3:, ::-2]
+
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertNotEqual(v, nd2)
+ self.assertNotEqual(w, nd1)
+ self.assertNotEqual(v, w)
+
+ # initialize mixed C/Fortran + suboffsets
+ lst1 = list(range(-15, 15))
+ lst2 = transpose(lst1, [3, 2, 5])
+ nd1 = ndarray(lst1, shape=[3, 2, 5], format='@l', flags=ND_PIL)
+ nd2 = ndarray(lst2, shape=[3, 2, 5], format='l', flags=ND_FORTRAN|ND_PIL)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, w)
+
+ # initialize mixed C/Fortran + suboffsets, struct module
+ lst1 = [(b'sashimi', b'sliced', 20.05)]*30
+ lst1[11] = (b'ramen', b'spicy', 9.45)
+ lst2 = transpose(lst1, [3, 2, 5])
+
+ nd1 = ndarray(lst1, shape=[3, 2, 5], format='< 10p 9p d', flags=ND_PIL)
+ nd2 = ndarray(lst2, shape=[3, 2, 5], format='> 10p 9p d',
+ flags=ND_FORTRAN|ND_PIL)
+ v = memoryview(nd1)
+ w = memoryview(nd2)
+
+ self.assertEqual(v, nd1)
+ self.assertEqual(w, nd2)
+ self.assertEqual(v, w)
+
+ def test_memoryview_compare_not_equal(self):
+
+ # items not equal
+ for byteorder in ['=', '<', '>', '!']:
+ x = ndarray([2**63]*120, shape=[3,5,2,2,2], format=byteorder+'Q')
+ y = ndarray([2**63]*120, shape=[3,5,2,2,2], format=byteorder+'Q',
+ flags=ND_WRITABLE|ND_FORTRAN)
+ y[2][3][1][1][1] = 1
+ a = memoryview(x)
+ b = memoryview(y)
+ self.assertEqual(a, x)
+ self.assertEqual(b, y)
+ self.assertNotEqual(a, b)
+ self.assertNotEqual(a, y)
+ self.assertNotEqual(b, x)
+
+ x = ndarray([(2**63, 2**31, 2**15)]*120, shape=[3,5,2,2,2],
+ format=byteorder+'QLH')
+ y = ndarray([(2**63, 2**31, 2**15)]*120, shape=[3,5,2,2,2],
+ format=byteorder+'QLH', flags=ND_WRITABLE|ND_FORTRAN)
+ y[2][3][1][1][1] = (1, 1, 1)
+ a = memoryview(x)
+ b = memoryview(y)
+ self.assertEqual(a, x)
+ self.assertEqual(b, y)
+ self.assertNotEqual(a, b)
+ self.assertNotEqual(a, y)
+ self.assertNotEqual(b, x)
+
+ def test_memoryview_check_released(self):
+
+ a = array.array('d', [1.1, 2.2, 3.3])
+
+ m = memoryview(a)
+ m.release()
+
+ # PyMemoryView_FromObject()
+ self.assertRaises(ValueError, memoryview, m)
+ # memoryview.cast()
+ self.assertRaises(ValueError, m.cast, 'c')
+ # getbuffer()
+ self.assertRaises(ValueError, ndarray, m)
+ # memoryview.tolist()
+ self.assertRaises(ValueError, m.tolist)
+ # memoryview.tobytes()
+ self.assertRaises(ValueError, m.tobytes)
+ # sequence
+ self.assertRaises(ValueError, eval, "1.0 in m", locals())
+ # subscript
+ self.assertRaises(ValueError, m.__getitem__, 0)
+ # assignment
+ self.assertRaises(ValueError, m.__setitem__, 0, 1)
+
+ for attr in ('obj', 'nbytes', 'readonly', 'itemsize', 'format', 'ndim',
+ 'shape', 'strides', 'suboffsets', 'c_contiguous',
+ 'f_contiguous', 'contiguous'):
+ self.assertRaises(ValueError, m.__getattribute__, attr)
+
+ # richcompare
+ b = array.array('d', [1.1, 2.2, 3.3])
+ m1 = memoryview(a)
+ m2 = memoryview(b)
+
+ self.assertEqual(m1, m2)
+ m1.release()
+ self.assertNotEqual(m1, m2)
+ self.assertNotEqual(m1, a)
+ self.assertEqual(m1, m1)
+
+ def test_memoryview_tobytes(self):
+ # Many implicit tests are already in self.verify().
+
+ t = (-529, 576, -625, 676, -729)
+
+ nd = ndarray(t, shape=[5], format='@h')
+ m = memoryview(nd)
+ self.assertEqual(m, nd)
+ self.assertEqual(m.tobytes(), nd.tobytes())
+
+ nd = ndarray([t], shape=[1], format='>hQiLl')
+ m = memoryview(nd)
+ self.assertEqual(m, nd)
+ self.assertEqual(m.tobytes(), nd.tobytes())
+
+ nd = ndarray([t for _ in range(12)], shape=[2,2,3], format='=hQiLl')
+ m = memoryview(nd)
+ self.assertEqual(m, nd)
+ self.assertEqual(m.tobytes(), nd.tobytes())
+
+ nd = ndarray([t for _ in range(120)], shape=[5,2,2,3,2],
+ format='<hQiLl')
+ m = memoryview(nd)
+ self.assertEqual(m, nd)
+ self.assertEqual(m.tobytes(), nd.tobytes())
+
+ # Unknown formats are handled: tobytes() purely depends on itemsize.
+ if ctypes:
+ # format: "T{>l:x:>l:y:}"
+ class BEPoint(ctypes.BigEndianStructure):
+ _fields_ = [("x", ctypes.c_long), ("y", ctypes.c_long)]
+ point = BEPoint(100, 200)
+ a = memoryview(point)
+ self.assertEqual(a.tobytes(), bytes(point))
+
+ def test_memoryview_get_contiguous(self):
+ # Many implicit tests are already in self.verify().
+
+ # no buffer interface
+ self.assertRaises(TypeError, get_contiguous, {}, PyBUF_READ, 'F')
+
+ # writable request to read-only object
+ self.assertRaises(BufferError, get_contiguous, b'x', PyBUF_WRITE, 'C')
+
+ # writable request to non-contiguous object
+ nd = ndarray([1, 2, 3], shape=[2], strides=[2])
+ self.assertRaises(BufferError, get_contiguous, nd, PyBUF_WRITE, 'A')
+
+ # scalar, read-only request from read-only exporter
+ nd = ndarray(9, shape=(), format="L")
+ for order in ['C', 'F', 'A']:
+ m = get_contiguous(nd, PyBUF_READ, order)
+ self.assertEqual(m, nd)
+ self.assertEqual(m[()], 9)
+
+ # scalar, read-only request from writable exporter
+ nd = ndarray(9, shape=(), format="L", flags=ND_WRITABLE)
+ for order in ['C', 'F', 'A']:
+ m = get_contiguous(nd, PyBUF_READ, order)
+ self.assertEqual(m, nd)
+ self.assertEqual(m[()], 9)
+
+ # scalar, writable request
+ for order in ['C', 'F', 'A']:
+ nd[()] = 9
+ m = get_contiguous(nd, PyBUF_WRITE, order)
+ self.assertEqual(m, nd)
+ self.assertEqual(m[()], 9)
+
+ m[()] = 10
+ self.assertEqual(m[()], 10)
+ self.assertEqual(nd[()], 10)
+
+ # zeros in shape
+ nd = ndarray([1], shape=[0], format="L", flags=ND_WRITABLE)
+ for order in ['C', 'F', 'A']:
+ m = get_contiguous(nd, PyBUF_READ, order)
+ self.assertRaises(IndexError, m.__getitem__, 0)
+ self.assertEqual(m, nd)
+ self.assertEqual(m.tolist(), [])
+
+ nd = ndarray(list(range(8)), shape=[2, 0, 7], format="L",
+ flags=ND_WRITABLE)
+ for order in ['C', 'F', 'A']:
+ m = get_contiguous(nd, PyBUF_READ, order)
+ self.assertEqual(ndarray(m).tolist(), [[], []])
+
+ # one-dimensional
+ nd = ndarray([1], shape=[1], format="h", flags=ND_WRITABLE)
+ for order in ['C', 'F', 'A']:
+ m = get_contiguous(nd, PyBUF_WRITE, order)
+ self.assertEqual(m, nd)
+ self.assertEqual(m.tolist(), nd.tolist())
+
+ nd = ndarray([1, 2, 3], shape=[3], format="b", flags=ND_WRITABLE)
+ for order in ['C', 'F', 'A']:
+ m = get_contiguous(nd, PyBUF_WRITE, order)
+ self.assertEqual(m, nd)
+ self.assertEqual(m.tolist(), nd.tolist())
+
+ # one-dimensional, non-contiguous
+ nd = ndarray([1, 2, 3], shape=[2], strides=[2], flags=ND_WRITABLE)
+ for order in ['C', 'F', 'A']:
+ m = get_contiguous(nd, PyBUF_READ, order)
+ self.assertEqual(m, nd)
+ self.assertEqual(m.tolist(), nd.tolist())
+ self.assertRaises(TypeError, m.__setitem__, 1, 20)
+ self.assertEqual(m[1], 3)
+ self.assertEqual(nd[1], 3)
+
+ nd = nd[::-1]
+ for order in ['C', 'F', 'A']:
+ m = get_contiguous(nd, PyBUF_READ, order)
+ self.assertEqual(m, nd)
+ self.assertEqual(m.tolist(), nd.tolist())
+ self.assertRaises(TypeError, m.__setitem__, 1, 20)
+ self.assertEqual(m[1], 1)
+ self.assertEqual(nd[1], 1)
+
+ # multi-dimensional, contiguous input
+ nd = ndarray(list(range(12)), shape=[3, 4], flags=ND_WRITABLE)
+ for order in ['C', 'A']:
+ m = get_contiguous(nd, PyBUF_WRITE, order)
+ self.assertEqual(ndarray(m).tolist(), nd.tolist())
+
+ self.assertRaises(BufferError, get_contiguous, nd, PyBUF_WRITE, 'F')
+ m = get_contiguous(nd, PyBUF_READ, order)
+ self.assertEqual(ndarray(m).tolist(), nd.tolist())
+
+ nd = ndarray(list(range(12)), shape=[3, 4],
+ flags=ND_WRITABLE|ND_FORTRAN)
+ for order in ['F', 'A']:
+ m = get_contiguous(nd, PyBUF_WRITE, order)
+ self.assertEqual(ndarray(m).tolist(), nd.tolist())
+
+ self.assertRaises(BufferError, get_contiguous, nd, PyBUF_WRITE, 'C')
+ m = get_contiguous(nd, PyBUF_READ, order)
+ self.assertEqual(ndarray(m).tolist(), nd.tolist())
+
+ # multi-dimensional, non-contiguous input
+ nd = ndarray(list(range(12)), shape=[3, 4], flags=ND_WRITABLE|ND_PIL)
+ for order in ['C', 'F', 'A']:
+ self.assertRaises(BufferError, get_contiguous, nd, PyBUF_WRITE,
+ order)
+ m = get_contiguous(nd, PyBUF_READ, order)
+ self.assertEqual(ndarray(m).tolist(), nd.tolist())
+
+ # flags
+ nd = ndarray([1,2,3,4,5], shape=[3], strides=[2])
+ m = get_contiguous(nd, PyBUF_READ, 'C')
+ self.assertTrue(m.c_contiguous)
+
+ def test_memoryview_serializing(self):
+
+ # C-contiguous
+ size = struct.calcsize('i')
+ a = array.array('i', [1,2,3,4,5])
+ m = memoryview(a)
+ buf = io.BytesIO(m)
+ b = bytearray(5*size)
+ buf.readinto(b)
+ self.assertEqual(m.tobytes(), b)
+
+ # C-contiguous, multi-dimensional
+ size = struct.calcsize('L')
+ nd = ndarray(list(range(12)), shape=[2,3,2], format="L")
+ m = memoryview(nd)
+ buf = io.BytesIO(m)
+ b = bytearray(2*3*2*size)
+ buf.readinto(b)
+ self.assertEqual(m.tobytes(), b)
+
+ # Fortran contiguous, multi-dimensional
+ #size = struct.calcsize('L')
+ #nd = ndarray(list(range(12)), shape=[2,3,2], format="L",
+ # flags=ND_FORTRAN)
+ #m = memoryview(nd)
+ #buf = io.BytesIO(m)
+ #b = bytearray(2*3*2*size)
+ #buf.readinto(b)
+ #self.assertEqual(m.tobytes(), b)
+
+ def test_memoryview_hash(self):
+
+ # bytes exporter
+ b = bytes(list(range(12)))
+ m = memoryview(b)
+ self.assertEqual(hash(b), hash(m))
+
+ # C-contiguous
+ mc = m.cast('c', shape=[3,4])
+ self.assertEqual(hash(mc), hash(b))
+
+ # non-contiguous
+ mx = m[::-2]
+ b = bytes(list(range(12))[::-2])
+ self.assertEqual(hash(mx), hash(b))
+
+ # Fortran contiguous
+ nd = ndarray(list(range(30)), shape=[3,2,5], flags=ND_FORTRAN)
+ m = memoryview(nd)
+ self.assertEqual(hash(m), hash(nd))
+
+ # multi-dimensional slice
+ nd = ndarray(list(range(30)), shape=[3,2,5])
+ x = nd[::2, ::, ::-1]
+ m = memoryview(x)
+ self.assertEqual(hash(m), hash(x))
+
+ # multi-dimensional slice with suboffsets
+ nd = ndarray(list(range(30)), shape=[2,5,3], flags=ND_PIL)
+ x = nd[::2, ::, ::-1]
+ m = memoryview(x)
+ self.assertEqual(hash(m), hash(x))
+
+ # non-byte formats
+ nd = ndarray(list(range(12)), shape=[2,2,3], format='L')
+ m = memoryview(nd)
+ self.assertEqual(hash(m), hash(nd.tobytes()))
+
+ nd = ndarray(list(range(-6, 6)), shape=[2,2,3], format='h')
+ m = memoryview(nd)
+ self.assertEqual(hash(m), hash(nd.tobytes()))
+
+ def test_memoryview_release(self):
+
+ # Create re-exporter from getbuffer(memoryview), then release the view.
+ a = bytearray([1,2,3])
+ m = memoryview(a)
+ nd = ndarray(m) # re-exporter
+ self.assertRaises(BufferError, m.release)
+ del nd
+ m.release()
+
+ a = bytearray([1,2,3])
+ m = memoryview(a)
+ nd1 = ndarray(m, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ nd2 = ndarray(nd1, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ self.assertIs(nd2.obj, m)
+ self.assertRaises(BufferError, m.release)
+ del nd1, nd2
+ m.release()
+
+ # chained views
+ a = bytearray([1,2,3])
+ m1 = memoryview(a)
+ m2 = memoryview(m1)
+ nd = ndarray(m2) # re-exporter
+ m1.release()
+ self.assertRaises(BufferError, m2.release)
+ del nd
+ m2.release()
+
+ a = bytearray([1,2,3])
+ m1 = memoryview(a)
+ m2 = memoryview(m1)
+ nd1 = ndarray(m2, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ nd2 = ndarray(nd1, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ self.assertIs(nd2.obj, m2)
+ m1.release()
+ self.assertRaises(BufferError, m2.release)
+ del nd1, nd2
+ m2.release()
+
+ # Allow changing layout while buffers are exported.
+ nd = ndarray([1,2,3], shape=[3], flags=ND_VAREXPORT)
+ m1 = memoryview(nd)
+
+ nd.push([4,5,6,7,8], shape=[5]) # mutate nd
+ m2 = memoryview(nd)
+
+ x = memoryview(m1)
+ self.assertEqual(x.tolist(), m1.tolist())
+
+ y = memoryview(m2)
+ self.assertEqual(y.tolist(), m2.tolist())
+ self.assertEqual(y.tolist(), nd.tolist())
+ m2.release()
+ y.release()
+
+ nd.pop() # pop the current view
+ self.assertEqual(x.tolist(), nd.tolist())
+
+ del nd
+ m1.release()
+ x.release()
+
+ # If multiple memoryviews share the same managed buffer, implicit
+ # release() in the context manager's __exit__() method should still
+ # work.
+ def catch22(b):
+ with memoryview(b) as m2:
+ pass
+
+ x = bytearray(b'123')
+ with memoryview(x) as m1:
+ catch22(m1)
+ self.assertEqual(m1[0], ord(b'1'))
+
+ x = ndarray(list(range(12)), shape=[2,2,3], format='l')
+ y = ndarray(x, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ z = ndarray(y, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ self.assertIs(z.obj, x)
+ with memoryview(z) as m:
+ catch22(m)
+ self.assertEqual(m[0:1].tolist(), [[[0, 1, 2], [3, 4, 5]]])
+
+ # Test garbage collection.
+ for flags in (0, ND_REDIRECT):
+ x = bytearray(b'123')
+ with memoryview(x) as m1:
+ del x
+ y = ndarray(m1, getbuf=PyBUF_FULL_RO, flags=flags)
+ with memoryview(y) as m2:
+ del y
+ z = ndarray(m2, getbuf=PyBUF_FULL_RO, flags=flags)
+ with memoryview(z) as m3:
+ del z
+ catch22(m3)
+ catch22(m2)
+ catch22(m1)
+ self.assertEqual(m1[0], ord(b'1'))
+ self.assertEqual(m2[1], ord(b'2'))
+ self.assertEqual(m3[2], ord(b'3'))
+ del m3
+ del m2
+ del m1
+
+ x = bytearray(b'123')
+ with memoryview(x) as m1:
+ del x
+ y = ndarray(m1, getbuf=PyBUF_FULL_RO, flags=flags)
+ with memoryview(y) as m2:
+ del y
+ z = ndarray(m2, getbuf=PyBUF_FULL_RO, flags=flags)
+ with memoryview(z) as m3:
+ del z
+ catch22(m1)
+ catch22(m2)
+ catch22(m3)
+ self.assertEqual(m1[0], ord(b'1'))
+ self.assertEqual(m2[1], ord(b'2'))
+ self.assertEqual(m3[2], ord(b'3'))
+ del m1, m2, m3
+
+ # memoryview.release() fails if the view has exported buffers.
+ x = bytearray(b'123')
+ with self.assertRaises(BufferError):
+ with memoryview(x) as m:
+ ex = ndarray(m)
+ m[0] == ord(b'1')
+
+ def test_memoryview_redirect(self):
+
+ nd = ndarray([1.0 * x for x in range(12)], shape=[12], format='d')
+ a = array.array('d', [1.0 * x for x in range(12)])
+
+ for x in (nd, a):
+ y = ndarray(x, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ z = ndarray(y, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ m = memoryview(z)
+
+ self.assertIs(y.obj, x)
+ self.assertIs(z.obj, x)
+ self.assertIs(m.obj, x)
+
+ self.assertEqual(m, x)
+ self.assertEqual(m, y)
+ self.assertEqual(m, z)
+
+ self.assertEqual(m[1:3], x[1:3])
+ self.assertEqual(m[1:3], y[1:3])
+ self.assertEqual(m[1:3], z[1:3])
+ del y, z
+ self.assertEqual(m[1:3], x[1:3])
+
+ def test_memoryview_from_static_exporter(self):
+
+ fmt = 'B'
+ lst = [0,1,2,3,4,5,6,7,8,9,10,11]
+
+ # exceptions
+ self.assertRaises(TypeError, staticarray, 1, 2, 3)
+
+ # view.obj==x
+ x = staticarray()
+ y = memoryview(x)
+ self.verify(y, obj=x,
+ itemsize=1, fmt=fmt, readonly=1,
+ ndim=1, shape=[12], strides=[1],
+ lst=lst)
+ for i in range(12):
+ self.assertEqual(y[i], i)
+ del x
+ del y
+
+ x = staticarray()
+ y = memoryview(x)
+ del y
+ del x
+
+ x = staticarray()
+ y = ndarray(x, getbuf=PyBUF_FULL_RO)
+ z = ndarray(y, getbuf=PyBUF_FULL_RO)
+ m = memoryview(z)
+ self.assertIs(y.obj, x)
+ self.assertIs(m.obj, z)
+ self.verify(m, obj=z,
+ itemsize=1, fmt=fmt, readonly=1,
+ ndim=1, shape=[12], strides=[1],
+ lst=lst)
+ del x, y, z, m
+
+ x = staticarray()
+ y = ndarray(x, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ z = ndarray(y, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ m = memoryview(z)
+ self.assertIs(y.obj, x)
+ self.assertIs(z.obj, x)
+ self.assertIs(m.obj, x)
+ self.verify(m, obj=x,
+ itemsize=1, fmt=fmt, readonly=1,
+ ndim=1, shape=[12], strides=[1],
+ lst=lst)
+ del x, y, z, m
+
+ # view.obj==NULL
+ x = staticarray(legacy_mode=True)
+ y = memoryview(x)
+ self.verify(y, obj=None,
+ itemsize=1, fmt=fmt, readonly=1,
+ ndim=1, shape=[12], strides=[1],
+ lst=lst)
+ for i in range(12):
+ self.assertEqual(y[i], i)
+ del x
+ del y
+
+ x = staticarray(legacy_mode=True)
+ y = memoryview(x)
+ del y
+ del x
+
+ x = staticarray(legacy_mode=True)
+ y = ndarray(x, getbuf=PyBUF_FULL_RO)
+ z = ndarray(y, getbuf=PyBUF_FULL_RO)
+ m = memoryview(z)
+ self.assertIs(y.obj, None)
+ self.assertIs(m.obj, z)
+ self.verify(m, obj=z,
+ itemsize=1, fmt=fmt, readonly=1,
+ ndim=1, shape=[12], strides=[1],
+ lst=lst)
+ del x, y, z, m
+
+ x = staticarray(legacy_mode=True)
+ y = ndarray(x, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ z = ndarray(y, getbuf=PyBUF_FULL_RO, flags=ND_REDIRECT)
+ m = memoryview(z)
+ # Clearly setting view.obj==NULL is inferior, since it
+ # messes up the redirection chain:
+ self.assertIs(y.obj, None)
+ self.assertIs(z.obj, y)
+ self.assertIs(m.obj, y)
+ self.verify(m, obj=y,
+ itemsize=1, fmt=fmt, readonly=1,
+ ndim=1, shape=[12], strides=[1],
+ lst=lst)
+ del x, y, z, m
+
+ def test_memoryview_getbuffer_undefined(self):
+
+ # getbufferproc does not adhere to the new documentation
+ nd = ndarray([1,2,3], [3], flags=ND_GETBUF_FAIL|ND_GETBUF_UNDEFINED)
+ self.assertRaises(BufferError, memoryview, nd)
+
+ def test_issue_7385(self):
+ x = ndarray([1,2,3], shape=[3], flags=ND_GETBUF_FAIL)
+ self.assertRaises(BufferError, memoryview, x)
+
+
+def test_main():
+ support.run_unittest(TestBufferProtocol)
+
+
+if __name__ == "__main__":
+ test_main()
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index 55fb63a..c32992c 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -1,19 +1,21 @@
# Python test set -- built-in functions
-import platform
-import unittest
-import sys
-import warnings
+import ast
+import builtins
import collections
import io
+import locale
import os
-import ast
-import types
-import builtins
+import pickle
+import platform
import random
+import sys
import traceback
-from test.support import fcmp, TESTFN, unlink, run_unittest, check_warnings
+import types
+import unittest
+import warnings
from operator import neg
+from test.support import TESTFN, unlink, run_unittest, check_warnings
try:
import pty, signal
except ImportError:
@@ -110,7 +112,30 @@ class TestFailingIter:
def __iter__(self):
raise RuntimeError
+def filter_char(arg):
+ return ord(arg) > ord("d")
+
+def map_char(arg):
+ return chr(ord(arg)+1)
+
class BuiltinTest(unittest.TestCase):
+ # Helper to check picklability
+ def check_iter_pickle(self, it, seq):
+ itorg = it
+ d = pickle.dumps(it)
+ it = pickle.loads(d)
+ self.assertEqual(type(itorg), type(it))
+ self.assertEqual(list(it), seq)
+
+ #test the iterator after dropping one from it
+ it = pickle.loads(d)
+ try:
+ next(it)
+ except StopIteration:
+ return
+ d = pickle.dumps(it)
+ it = pickle.loads(d)
+ self.assertEqual(list(it), seq[1:])
def test_import(self):
__import__('sys')
@@ -255,8 +280,7 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(chr(0xff), '\xff')
self.assertRaises(ValueError, chr, 1<<24)
self.assertEqual(chr(sys.maxunicode),
- str(('\\U%08x' % (sys.maxunicode)).encode("ascii"),
- 'unicode-escape'))
+ str('\\U0010ffff'.encode("ascii"), 'unicode-escape'))
self.assertRaises(TypeError, chr)
self.assertEqual(chr(0x0000FFFF), "\U0000FFFF")
self.assertEqual(chr(0x00010000), "\U00010000")
@@ -378,7 +402,15 @@ class BuiltinTest(unittest.TestCase):
f = Foo()
self.assertTrue(dir(f) == ["ga", "kan", "roo"])
- # dir(obj__dir__not_list)
+ # dir(obj__dir__tuple)
+ class Foo(object):
+ def __dir__(self):
+ return ("b", "c", "a")
+ res = dir(Foo())
+ self.assertIsInstance(res, list)
+ self.assertTrue(res == ["a", "b", "c"])
+
+ # dir(obj__dir__not_sequence)
class Foo(object):
def __dir__(self):
return 7
@@ -391,6 +423,8 @@ class BuiltinTest(unittest.TestCase):
except:
self.assertEqual(len(dir(sys.exc_info()[2])), 4)
+ # test that object has a __dir__()
+ self.assertEqual(sorted([].__dir__()), dir([]))
def test_divmod(self):
self.assertEqual(divmod(12, 7), (1, 5))
@@ -400,10 +434,13 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(divmod(-sys.maxsize-1, -1), (sys.maxsize+1, 0))
- self.assertTrue(not fcmp(divmod(3.25, 1.0), (3.0, 0.25)))
- self.assertTrue(not fcmp(divmod(-3.25, 1.0), (-4.0, 0.75)))
- self.assertTrue(not fcmp(divmod(3.25, -1.0), (-4.0, -0.75)))
- self.assertTrue(not fcmp(divmod(-3.25, -1.0), (3.0, -0.25)))
+ for num, denom, exp_result in [ (3.25, 1.0, (3.0, 0.25)),
+ (-3.25, 1.0, (-4.0, 0.75)),
+ (3.25, -1.0, (-4.0, -0.75)),
+ (-3.25, -1.0, (3.0, -0.25))]:
+ result = divmod(num, denom)
+ self.assertAlmostEqual(result[0], exp_result[0])
+ self.assertAlmostEqual(result[1], exp_result[1])
self.assertRaises(TypeError, divmod)
@@ -518,6 +555,39 @@ class BuiltinTest(unittest.TestCase):
del l['__builtins__']
self.assertEqual((g, l), ({'a': 1}, {'b': 2}))
+ def test_exec_globals(self):
+ code = compile("print('Hello World!')", "", "exec")
+ # no builtin function
+ self.assertRaisesRegex(NameError, "name 'print' is not defined",
+ exec, code, {'__builtins__': {}})
+ # __builtins__ must be a mapping type
+ self.assertRaises(TypeError,
+ exec, code, {'__builtins__': 123})
+
+ # no __build_class__ function
+ code = compile("class A: pass", "", "exec")
+ self.assertRaisesRegex(NameError, "__build_class__ not found",
+ exec, code, {'__builtins__': {}})
+
+ class frozendict_error(Exception):
+ pass
+
+ class frozendict(dict):
+ def __setitem__(self, key, value):
+ raise frozendict_error("frozendict is readonly")
+
+ # read-only builtins
+ frozen_builtins = frozendict(__builtins__)
+ code = compile("__builtins__['superglobal']=2; print(superglobal)", "test", "exec")
+ self.assertRaises(frozendict_error,
+ exec, code, {'__builtins__': frozen_builtins})
+
+ # read-only globals
+ namespace = frozendict({})
+ code = compile("x=1", "test", "exec")
+ self.assertRaises(frozendict_error,
+ exec, code, namespace)
+
def test_exec_redirected(self):
savestdout = sys.stdout
sys.stdout = None # Whatever that cannot flush()
@@ -554,6 +624,11 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(list(filter(lambda x: x>=3, (1, 2, 3, 4))), [3, 4])
self.assertRaises(TypeError, list, filter(42, (1, 2)))
+ def test_filter_pickle(self):
+ f1 = filter(filter_char, "abcdeabcde")
+ f2 = filter(filter_char, "abcdeabcde")
+ self.check_iter_pickle(f1, list(f2))
+
def test_getattr(self):
self.assertTrue(getattr(sys, 'stdout') is sys.stdout)
self.assertRaises(TypeError, getattr, sys, 1)
@@ -747,6 +822,11 @@ class BuiltinTest(unittest.TestCase):
raise RuntimeError
self.assertRaises(RuntimeError, list, map(badfunc, range(5)))
+ def test_map_pickle(self):
+ m1 = map(map_char, "Is this the real life?")
+ m2 = map(map_char, "Is this the real life?")
+ self.check_iter_pickle(m1, list(m2))
+
def test_max(self):
self.assertEqual(max('123123'), '3')
self.assertEqual(max(1, 2, 3), 3)
@@ -880,7 +960,29 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(fp.read(1000), 'YYY'*100)
finally:
fp.close()
- unlink(TESTFN)
+ unlink(TESTFN)
+
+ def test_open_default_encoding(self):
+ old_environ = dict(os.environ)
+ try:
+ # try to get a user preferred encoding different than the current
+ # locale encoding to check that open() uses the current locale
+ # encoding and not the user preferred encoding
+ for key in ('LC_ALL', 'LANG', 'LC_CTYPE'):
+ if key in os.environ:
+ del os.environ[key]
+
+ self.write_testfile()
+ current_locale_encoding = locale.getpreferredencoding(False)
+ fp = open(TESTFN, 'w')
+ try:
+ self.assertEqual(fp.encoding, current_locale_encoding)
+ finally:
+ fp.close()
+ unlink(TESTFN)
+ finally:
+ os.environ.clear()
+ os.environ.update(old_environ)
def test_ord(self):
self.assertEqual(ord(' '), 32)
@@ -1197,6 +1299,9 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(TypeError, sum, 42)
self.assertRaises(TypeError, sum, ['a', 'b', 'c'])
self.assertRaises(TypeError, sum, ['a', 'b', 'c'], '')
+ self.assertRaises(TypeError, sum, [b'a', b'c'], b'')
+ values = [bytearray(b'a'), bytearray(b'b')]
+ self.assertRaises(TypeError, sum, values, bytearray(b''))
self.assertRaises(TypeError, sum, [[1], [2], [3]])
self.assertRaises(TypeError, sum, [{2:3}])
self.assertRaises(TypeError, sum, [{2:3}]*2, {2:3})
@@ -1285,6 +1390,13 @@ class BuiltinTest(unittest.TestCase):
return i
self.assertRaises(ValueError, list, zip(BadSeq(), BadSeq()))
+ def test_zip_pickle(self):
+ a = (1, 2, 3)
+ b = (4, 5, 6)
+ t = [(1, 4), (2, 5), (3, 6)]
+ z1 = zip(a, b)
+ self.check_iter_pickle(z1, t)
+
def test_format(self):
# Test the basic machinery of the format() builtin. Don't test
# the specifics of the various formatters
@@ -1358,14 +1470,14 @@ class BuiltinTest(unittest.TestCase):
# --------------------------------------------------------------------
# Issue #7994: object.__format__ with a non-empty format string is
- # pending deprecated
+ # deprecated
def test_deprecated_format_string(obj, fmt_str, should_raise_warning):
with warnings.catch_warnings(record=True) as w:
- warnings.simplefilter("always", PendingDeprecationWarning)
+ warnings.simplefilter("always", DeprecationWarning)
format(obj, fmt_str)
if should_raise_warning:
self.assertEqual(len(w), 1)
- self.assertIsInstance(w[0].message, PendingDeprecationWarning)
+ self.assertIsInstance(w[0].message, DeprecationWarning)
self.assertIn('object.__format__ with a non-empty format '
'string', str(w[0].message))
else:
@@ -1409,6 +1521,13 @@ class BuiltinTest(unittest.TestCase):
self.assertRaises(ValueError, x.translate, b"1", 1)
self.assertRaises(TypeError, x.translate, b"1"*256, 1)
+ def test_construct_singletons(self):
+ for const in None, Ellipsis, NotImplemented:
+ tp = type(const)
+ self.assertIs(tp(), const)
+ self.assertRaises(TypeError, tp, 1, 2)
+ self.assertRaises(TypeError, tp, a=1, b=2)
+
class TestSorted(unittest.TestCase):
def test_basic(self):
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index 5eab8f5..8ce6c22 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -188,24 +188,26 @@ class BaseBytesTest(unittest.TestCase):
def test_encoding(self):
sample = "Hello world\n\u1234\u5678\u9abc"
- for enc in ("utf8", "utf16"):
+ for enc in ("utf-8", "utf-16"):
b = self.type2test(sample, enc)
self.assertEqual(b, self.type2test(sample.encode(enc)))
- self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1")
- b = self.type2test(sample, "latin1", "ignore")
+ self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin-1")
+ b = self.type2test(sample, "latin-1", "ignore")
self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
def test_decode(self):
sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"
- for enc in ("utf8", "utf16"):
+ for enc in ("utf-8", "utf-16"):
b = self.type2test(sample, enc)
self.assertEqual(b.decode(enc), sample)
sample = "Hello world\n\x80\x81\xfe\xff"
- b = self.type2test(sample, "latin1")
- self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
- self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
- self.assertEqual(b.decode(errors="ignore", encoding="utf8"),
+ b = self.type2test(sample, "latin-1")
+ self.assertRaises(UnicodeDecodeError, b.decode, "utf-8")
+ self.assertEqual(b.decode("utf-8", "ignore"), "Hello world\n")
+ self.assertEqual(b.decode(errors="ignore", encoding="utf-8"),
"Hello world\n")
+ # Default encoding is utf-8
+ self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
def test_from_int(self):
b = self.type2test(0)
@@ -291,10 +293,27 @@ class BaseBytesTest(unittest.TestCase):
def test_count(self):
b = self.type2test(b'mississippi')
+ i = 105
+ p = 112
+ w = 119
+
self.assertEqual(b.count(b'i'), 4)
self.assertEqual(b.count(b'ss'), 2)
self.assertEqual(b.count(b'w'), 0)
+ self.assertEqual(b.count(i), 4)
+ self.assertEqual(b.count(w), 0)
+
+ self.assertEqual(b.count(b'i', 6), 2)
+ self.assertEqual(b.count(b'p', 6), 2)
+ self.assertEqual(b.count(b'i', 1, 3), 1)
+ self.assertEqual(b.count(b'p', 7, 9), 1)
+
+ self.assertEqual(b.count(i, 6), 2)
+ self.assertEqual(b.count(p, 6), 2)
+ self.assertEqual(b.count(i, 1, 3), 1)
+ self.assertEqual(b.count(p, 7, 9), 1)
+
def test_startswith(self):
b = self.type2test(b'hello')
self.assertFalse(self.type2test().startswith(b"anything"))
@@ -325,35 +344,86 @@ class BaseBytesTest(unittest.TestCase):
def test_find(self):
b = self.type2test(b'mississippi')
+ i = 105
+ w = 119
+
self.assertEqual(b.find(b'ss'), 2)
+ self.assertEqual(b.find(b'w'), -1)
+ self.assertEqual(b.find(b'mississippian'), -1)
+
+ self.assertEqual(b.find(i), 1)
+ self.assertEqual(b.find(w), -1)
+
self.assertEqual(b.find(b'ss', 3), 5)
self.assertEqual(b.find(b'ss', 1, 7), 2)
self.assertEqual(b.find(b'ss', 1, 3), -1)
- self.assertEqual(b.find(b'w'), -1)
- self.assertEqual(b.find(b'mississippian'), -1)
+
+ self.assertEqual(b.find(i, 6), 7)
+ self.assertEqual(b.find(i, 1, 3), 1)
+ self.assertEqual(b.find(w, 1, 3), -1)
+
+ for index in (-1, 256, sys.maxsize + 1):
+ self.assertRaisesRegex(
+ ValueError, r'byte must be in range\(0, 256\)',
+ b.find, index)
def test_rfind(self):
b = self.type2test(b'mississippi')
+ i = 105
+ w = 119
+
self.assertEqual(b.rfind(b'ss'), 5)
- self.assertEqual(b.rfind(b'ss', 3), 5)
- self.assertEqual(b.rfind(b'ss', 0, 6), 2)
self.assertEqual(b.rfind(b'w'), -1)
self.assertEqual(b.rfind(b'mississippian'), -1)
+ self.assertEqual(b.rfind(i), 10)
+ self.assertEqual(b.rfind(w), -1)
+
+ self.assertEqual(b.rfind(b'ss', 3), 5)
+ self.assertEqual(b.rfind(b'ss', 0, 6), 2)
+
+ self.assertEqual(b.rfind(i, 1, 3), 1)
+ self.assertEqual(b.rfind(i, 3, 9), 7)
+ self.assertEqual(b.rfind(w, 1, 3), -1)
+
def test_index(self):
- b = self.type2test(b'world')
- self.assertEqual(b.index(b'w'), 0)
- self.assertEqual(b.index(b'orl'), 1)
- self.assertRaises(ValueError, b.index, b'worm')
- self.assertRaises(ValueError, b.index, b'ldo')
+ b = self.type2test(b'mississippi')
+ i = 105
+ w = 119
+
+ self.assertEqual(b.index(b'ss'), 2)
+ self.assertRaises(ValueError, b.index, b'w')
+ self.assertRaises(ValueError, b.index, b'mississippian')
+
+ self.assertEqual(b.index(i), 1)
+ self.assertRaises(ValueError, b.index, w)
+
+ self.assertEqual(b.index(b'ss', 3), 5)
+ self.assertEqual(b.index(b'ss', 1, 7), 2)
+ self.assertRaises(ValueError, b.index, b'ss', 1, 3)
+
+ self.assertEqual(b.index(i, 6), 7)
+ self.assertEqual(b.index(i, 1, 3), 1)
+ self.assertRaises(ValueError, b.index, w, 1, 3)
def test_rindex(self):
- # XXX could be more rigorous
- b = self.type2test(b'world')
- self.assertEqual(b.rindex(b'w'), 0)
- self.assertEqual(b.rindex(b'orl'), 1)
- self.assertRaises(ValueError, b.rindex, b'worm')
- self.assertRaises(ValueError, b.rindex, b'ldo')
+ b = self.type2test(b'mississippi')
+ i = 105
+ w = 119
+
+ self.assertEqual(b.rindex(b'ss'), 5)
+ self.assertRaises(ValueError, b.rindex, b'w')
+ self.assertRaises(ValueError, b.rindex, b'mississippian')
+
+ self.assertEqual(b.rindex(i), 10)
+ self.assertRaises(ValueError, b.rindex, w)
+
+ self.assertEqual(b.rindex(b'ss', 3), 5)
+ self.assertEqual(b.rindex(b'ss', 0, 6), 2)
+
+ self.assertEqual(b.rindex(i, 1, 3), 1)
+ self.assertEqual(b.rindex(i, 3, 9), 7)
+ self.assertRaises(ValueError, b.rindex, w, 1, 3)
def test_replace(self):
b = self.type2test(b'mississippi')
@@ -365,6 +435,14 @@ class BaseBytesTest(unittest.TestCase):
self.assertEqual(b.split(b'i'), [b'm', b'ss', b'ss', b'pp', b''])
self.assertEqual(b.split(b'ss'), [b'mi', b'i', b'ippi'])
self.assertEqual(b.split(b'w'), [b])
+ # with keyword args
+ b = self.type2test(b'a|b|c|d')
+ self.assertEqual(b.split(sep=b'|'), [b'a', b'b', b'c', b'd'])
+ self.assertEqual(b.split(b'|', maxsplit=1), [b'a', b'b|c|d'])
+ self.assertEqual(b.split(sep=b'|', maxsplit=1), [b'a', b'b|c|d'])
+ self.assertEqual(b.split(maxsplit=1, sep=b'|'), [b'a', b'b|c|d'])
+ b = self.type2test(b'a b c d')
+ self.assertEqual(b.split(maxsplit=1), [b'a', b'b c d'])
def test_split_whitespace(self):
for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf',
@@ -393,6 +471,14 @@ class BaseBytesTest(unittest.TestCase):
self.assertEqual(b.rsplit(b'i'), [b'm', b'ss', b'ss', b'pp', b''])
self.assertEqual(b.rsplit(b'ss'), [b'mi', b'i', b'ippi'])
self.assertEqual(b.rsplit(b'w'), [b])
+ # with keyword args
+ b = self.type2test(b'a|b|c|d')
+ self.assertEqual(b.rsplit(sep=b'|'), [b'a', b'b', b'c', b'd'])
+ self.assertEqual(b.rsplit(b'|', maxsplit=1), [b'a|b|c', b'd'])
+ self.assertEqual(b.rsplit(sep=b'|', maxsplit=1), [b'a|b|c', b'd'])
+ self.assertEqual(b.rsplit(maxsplit=1, sep=b'|'), [b'a|b|c', b'd'])
+ b = self.type2test(b'a b c d')
+ self.assertEqual(b.rsplit(maxsplit=1), [b'a b c', b'd'])
def test_rsplit_whitespace(self):
for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf',
@@ -432,6 +518,24 @@ class BaseBytesTest(unittest.TestCase):
q = pickle.loads(ps)
self.assertEqual(b, q)
+ def test_iterator_pickling(self):
+ for b in b"", b"a", b"abc", b"\xffab\x80", b"\0\0\377\0\0":
+ it = itorg = iter(self.type2test(b))
+ data = list(self.type2test(b))
+ d = pickle.dumps(it)
+ it = pickle.loads(d)
+ self.assertEqual(type(itorg), type(it))
+ self.assertEqual(list(it), data)
+
+ it = pickle.loads(d)
+ try:
+ next(it)
+ except StopIteration:
+ continue
+ d = pickle.dumps(it)
+ it = pickle.loads(d)
+ self.assertEqual(list(it), data[1:])
+
def test_strip(self):
b = self.type2test(b'mississippi')
self.assertEqual(b.strip(b'i'), b'mississipp')
@@ -473,6 +577,27 @@ class BaseBytesTest(unittest.TestCase):
self.assertRaises(TypeError, self.type2test(b'abc').lstrip, 'b')
self.assertRaises(TypeError, self.type2test(b'abc').rstrip, 'b')
+ def test_center(self):
+ # Fill character can be either bytes or bytearray (issue 12380)
+ b = self.type2test(b'abc')
+ for fill_type in (bytes, bytearray):
+ self.assertEqual(b.center(7, fill_type(b'-')),
+ self.type2test(b'--abc--'))
+
+ def test_ljust(self):
+ # Fill character can be either bytes or bytearray (issue 12380)
+ b = self.type2test(b'abc')
+ for fill_type in (bytes, bytearray):
+ self.assertEqual(b.ljust(7, fill_type(b'-')),
+ self.type2test(b'abc----'))
+
+ def test_rjust(self):
+ # Fill character can be either bytes or bytearray (issue 12380)
+ b = self.type2test(b'abc')
+ for fill_type in (bytes, bytearray):
+ self.assertEqual(b.rjust(7, fill_type(b'-')),
+ self.type2test(b'----abc'))
+
def test_ord(self):
b = self.type2test(b'\0A\x7f\x80\xff')
self.assertEqual([ord(b[i:i+1]) for i in range(len(b))],
@@ -529,6 +654,14 @@ class BaseBytesTest(unittest.TestCase):
self.assertEqual(True, b.startswith(h, None, -2))
self.assertEqual(False, b.startswith(x, None, None))
+ def test_integer_arguments_out_of_byte_range(self):
+ b = self.type2test(b'hello')
+
+ for method in (b.count, b.find, b.index, b.rfind, b.rindex):
+ self.assertRaises(ValueError, method, -1)
+ self.assertRaises(ValueError, method, 256)
+ self.assertRaises(ValueError, method, 9999)
+
def test_find_etc_raise_correct_error_messages(self):
# issue 11828
b = self.type2test(b'hello')
@@ -634,6 +767,39 @@ class ByteArrayTest(BaseBytesTest):
b.reverse()
self.assertFalse(b)
+ def test_clear(self):
+ b = bytearray(b'python')
+ b.clear()
+ self.assertEqual(b, b'')
+
+ b = bytearray(b'')
+ b.clear()
+ self.assertEqual(b, b'')
+
+ b = bytearray(b'')
+ b.append(ord('r'))
+ b.clear()
+ b.append(ord('p'))
+ self.assertEqual(b, b'p')
+
+ def test_copy(self):
+ b = bytearray(b'abc')
+ bb = b.copy()
+ self.assertEqual(bb, b'abc')
+
+ b = bytearray(b'')
+ bb = b.copy()
+ self.assertEqual(bb, b'')
+
+ # test that it's indeed a copy and not a reference
+ b = bytearray(b'abc')
+ bb = b.copy()
+ self.assertEqual(b, bb)
+ self.assertIsNot(b, bb)
+ bb.append(ord('d'))
+ self.assertEqual(bb, b'abcd')
+ self.assertEqual(b, b'abc')
+
def test_regexps(self):
def by(s):
return bytearray(map(ord, s))
@@ -1105,9 +1271,11 @@ class FixedStringTest(test.string_tests.BaseTest):
class ByteArrayAsStringTest(FixedStringTest):
type2test = bytearray
+ contains_bytes = True
class BytesAsStringTest(FixedStringTest):
type2test = bytes
+ contains_bytes = True
class SubclassTest(unittest.TestCase):
diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py
index be35580..257b144 100644
--- a/Lib/test/test_bz2.py
+++ b/Lib/test/test_bz2.py
@@ -1,10 +1,11 @@
#!/usr/bin/env python3
from test import support
-from test.support import TESTFN
+from test.support import TESTFN, bigmemtest, _4G
import unittest
from io import BytesIO
import os
+import random
import subprocess
import sys
@@ -21,9 +22,38 @@ has_cmdline_bunzip2 = sys.platform not in ("win32", "os2emx")
class BaseTest(unittest.TestCase):
"Base for other testcases."
- TEXT = b'root:x:0:0:root:/root:/bin/bash\nbin:x:1:1:bin:/bin:\ndaemon:x:2:2:daemon:/sbin:\nadm:x:3:4:adm:/var/adm:\nlp:x:4:7:lp:/var/spool/lpd:\nsync:x:5:0:sync:/sbin:/bin/sync\nshutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\nhalt:x:7:0:halt:/sbin:/sbin/halt\nmail:x:8:12:mail:/var/spool/mail:\nnews:x:9:13:news:/var/spool/news:\nuucp:x:10:14:uucp:/var/spool/uucp:\noperator:x:11:0:operator:/root:\ngames:x:12:100:games:/usr/games:\ngopher:x:13:30:gopher:/usr/lib/gopher-data:\nftp:x:14:50:FTP User:/var/ftp:/bin/bash\nnobody:x:65534:65534:Nobody:/home:\npostfix:x:100:101:postfix:/var/spool/postfix:\nniemeyer:x:500:500::/home/niemeyer:/bin/bash\npostgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\nmysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\nwww:x:103:104::/var/www:/bin/false\n'
+ TEXT_LINES = [
+ b'root:x:0:0:root:/root:/bin/bash\n',
+ b'bin:x:1:1:bin:/bin:\n',
+ b'daemon:x:2:2:daemon:/sbin:\n',
+ b'adm:x:3:4:adm:/var/adm:\n',
+ b'lp:x:4:7:lp:/var/spool/lpd:\n',
+ b'sync:x:5:0:sync:/sbin:/bin/sync\n',
+ b'shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown\n',
+ b'halt:x:7:0:halt:/sbin:/sbin/halt\n',
+ b'mail:x:8:12:mail:/var/spool/mail:\n',
+ b'news:x:9:13:news:/var/spool/news:\n',
+ b'uucp:x:10:14:uucp:/var/spool/uucp:\n',
+ b'operator:x:11:0:operator:/root:\n',
+ b'games:x:12:100:games:/usr/games:\n',
+ b'gopher:x:13:30:gopher:/usr/lib/gopher-data:\n',
+ b'ftp:x:14:50:FTP User:/var/ftp:/bin/bash\n',
+ b'nobody:x:65534:65534:Nobody:/home:\n',
+ b'postfix:x:100:101:postfix:/var/spool/postfix:\n',
+ b'niemeyer:x:500:500::/home/niemeyer:/bin/bash\n',
+ b'postgres:x:101:102:PostgreSQL Server:/var/lib/pgsql:/bin/bash\n',
+ b'mysql:x:102:103:MySQL server:/var/lib/mysql:/bin/bash\n',
+ b'www:x:103:104::/var/www:/bin/false\n',
+ ]
+ TEXT = b''.join(TEXT_LINES)
DATA = b'BZh91AY&SY.\xc8N\x18\x00\x01>_\x80\x00\x10@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe00\x01\x99\xaa\x00\xc0\x03F\x86\x8c#&\x83F\x9a\x03\x06\xa6\xd0\xa6\x93M\x0fQ\xa7\xa8\x06\x804hh\x12$\x11\xa4i4\xf14S\xd2<Q\xb5\x0fH\xd3\xd4\xdd\xd5\x87\xbb\xf8\x94\r\x8f\xafI\x12\xe1\xc9\xf8/E\x00pu\x89\x12]\xc9\xbbDL\nQ\x0e\t1\x12\xdf\xa0\xc0\x97\xac2O9\x89\x13\x94\x0e\x1c7\x0ed\x95I\x0c\xaaJ\xa4\x18L\x10\x05#\x9c\xaf\xba\xbc/\x97\x8a#C\xc8\xe1\x8cW\xf9\xe2\xd0\xd6M\xa7\x8bXa<e\x84t\xcbL\xb3\xa7\xd9\xcd\xd1\xcb\x84.\xaf\xb3\xab\xab\xad`n}\xa0lh\tE,\x8eZ\x15\x17VH>\x88\xe5\xcd9gd6\x0b\n\xe9\x9b\xd5\x8a\x99\xf7\x08.K\x8ev\xfb\xf7xw\xbb\xdf\xa1\x92\xf1\xdd|/";\xa2\xba\x9f\xd5\xb1#A\xb6\xf6\xb3o\xc9\xc5y\\\xebO\xe7\x85\x9a\xbc\xb6f8\x952\xd5\xd7"%\x89>V,\xf7\xa6z\xe2\x9f\xa3\xdf\x11\x11"\xd6E)I\xa9\x13^\xca\xf3r\xd0\x03U\x922\xf26\xec\xb6\xed\x8b\xc3U\x13\x9d\xc5\x170\xa4\xfa^\x92\xacDF\x8a\x97\xd6\x19\xfe\xdd\xb8\xbd\x1a\x9a\x19\xa3\x80ankR\x8b\xe5\xd83]\xa9\xc6\x08\x82f\xf6\xb9"6l$\xb8j@\xc0\x8a\xb0l1..\xbak\x83ls\x15\xbc\xf4\xc1\x13\xbe\xf8E\xb8\x9d\r\xa8\x9dk\x84\xd3n\xfa\xacQ\x07\xb1%y\xaav\xb4\x08\xe0z\x1b\x16\xf5\x04\xe9\xcc\xb9\x08z\x1en7.G\xfc]\xc9\x14\xe1B@\xbb!8`'
- DATA_CRLF = b'BZh91AY&SY\xaez\xbbN\x00\x01H\xdf\x80\x00\x12@\x02\xff\xf0\x01\x07n\x00?\xe7\xff\xe0@\x01\xbc\xc6`\x86*\x8d=M\xa9\x9a\x86\xd0L@\x0fI\xa6!\xa1\x13\xc8\x88jdi\x8d@\x03@\x1a\x1a\x0c\x0c\x83 \x00\xc4h2\x19\x01\x82D\x84e\t\xe8\x99\x89\x19\x1ah\x00\r\x1a\x11\xaf\x9b\x0fG\xf5(\x1b\x1f?\t\x12\xcf\xb5\xfc\x95E\x00ps\x89\x12^\xa4\xdd\xa2&\x05(\x87\x04\x98\x89u\xe40%\xb6\x19\'\x8c\xc4\x89\xca\x07\x0e\x1b!\x91UIFU%C\x994!DI\xd2\xfa\xf0\xf1N8W\xde\x13A\xf5\x9cr%?\x9f3;I45A\xd1\x8bT\xb1<l\xba\xcb_\xc00xY\x17r\x17\x88\x08\x08@\xa0\ry@\x10\x04$)`\xf2\xce\x89z\xb0s\xec\x9b.iW\x9d\x81\xb5-+t\x9f\x1a\'\x97dB\xf5x\xb5\xbe.[.\xd7\x0e\x81\xe7\x08\x1cN`\x88\x10\xca\x87\xc3!"\x80\x92R\xa1/\xd1\xc0\xe6mf\xac\xbd\x99\xcca\xb3\x8780>\xa4\xc7\x8d\x1a\\"\xad\xa1\xabyBg\x15\xb9l\x88\x88\x91k"\x94\xa4\xd4\x89\xae*\xa6\x0b\x10\x0c\xd6\xd4m\xe86\xec\xb5j\x8a\x86j\';\xca.\x01I\xf2\xaaJ\xe8\x88\x8cU+t3\xfb\x0c\n\xa33\x13r2\r\x16\xe0\xb3(\xbf\x1d\x83r\xe7M\xf0D\x1365\xd8\x88\xd3\xa4\x92\xcb2\x06\x04\\\xc1\xb0\xea//\xbek&\xd8\xe6+t\xe5\xa1\x13\xada\x16\xder5"w]\xa2i\xb7[\x97R \xe2IT\xcd;Z\x04dk4\xad\x8a\t\xd3\x81z\x10\xf1:^`\xab\x1f\xc5\xdc\x91N\x14$+\x9e\xae\xd3\x80'
+
+ def setUp(self):
+ self.filename = TESTFN
+
+ def tearDown(self):
+ if os.path.isfile(self.filename):
+ os.unlink(self.filename)
if has_cmdline_bunzip2:
def decompress(self, data):
@@ -47,90 +77,149 @@ class BaseTest(unittest.TestCase):
class BZ2FileTest(BaseTest):
"Test BZ2File type miscellaneous methods."
- def setUp(self):
- self.filename = TESTFN
-
- def tearDown(self):
- if os.path.isfile(self.filename):
- os.unlink(self.filename)
-
- def createTempFile(self, crlf=0):
+ def createTempFile(self, streams=1):
with open(self.filename, "wb") as f:
- if crlf:
- data = self.DATA_CRLF
- else:
- data = self.DATA
- f.write(data)
+ f.write(self.DATA * streams)
+
+ def testBadArgs(self):
+ with self.assertRaises(TypeError):
+ BZ2File(123.456)
+ with self.assertRaises(ValueError):
+ BZ2File("/dev/null", "z")
+ with self.assertRaises(ValueError):
+ BZ2File("/dev/null", "rx")
+ with self.assertRaises(ValueError):
+ BZ2File("/dev/null", "rbt")
+ with self.assertRaises(ValueError):
+ BZ2File("/dev/null", compresslevel=0)
+ with self.assertRaises(ValueError):
+ BZ2File("/dev/null", compresslevel=10)
def testRead(self):
- # "Test BZ2File.read()"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.read, None)
self.assertEqual(bz2f.read(), self.TEXT)
+ def testReadMultiStream(self):
+ self.createTempFile(streams=5)
+ with BZ2File(self.filename) as bz2f:
+ self.assertRaises(TypeError, bz2f.read, None)
+ self.assertEqual(bz2f.read(), self.TEXT * 5)
+
+ def testReadMonkeyMultiStream(self):
+ # Test BZ2File.read() on a multi-stream archive where a stream
+ # boundary coincides with the end of the raw read buffer.
+ buffer_size = bz2._BUFFER_SIZE
+ bz2._BUFFER_SIZE = len(self.DATA)
+ try:
+ self.createTempFile(streams=5)
+ with BZ2File(self.filename) as bz2f:
+ self.assertRaises(TypeError, bz2f.read, None)
+ self.assertEqual(bz2f.read(), self.TEXT * 5)
+ finally:
+ bz2._BUFFER_SIZE = buffer_size
+
def testRead0(self):
- # Test BBZ2File.read(0)"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.read, None)
self.assertEqual(bz2f.read(0), b"")
def testReadChunk10(self):
- # "Test BZ2File.read() in chunks of 10 bytes"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
text = b''
- while 1:
+ while True:
str = bz2f.read(10)
if not str:
break
text += str
self.assertEqual(text, self.TEXT)
+ def testReadChunk10MultiStream(self):
+ self.createTempFile(streams=5)
+ with BZ2File(self.filename) as bz2f:
+ text = b''
+ while True:
+ str = bz2f.read(10)
+ if not str:
+ break
+ text += str
+ self.assertEqual(text, self.TEXT * 5)
+
def testRead100(self):
- # "Test BZ2File.read(100)"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
self.assertEqual(bz2f.read(100), self.TEXT[:100])
+ def testPeek(self):
+ self.createTempFile()
+ with BZ2File(self.filename) as bz2f:
+ pdata = bz2f.peek()
+ self.assertNotEqual(len(pdata), 0)
+ self.assertTrue(self.TEXT.startswith(pdata))
+ self.assertEqual(bz2f.read(), self.TEXT)
+
+ def testReadInto(self):
+ self.createTempFile()
+ with BZ2File(self.filename) as bz2f:
+ n = 128
+ b = bytearray(n)
+ self.assertEqual(bz2f.readinto(b), n)
+ self.assertEqual(b, self.TEXT[:n])
+ n = len(self.TEXT) - n
+ b = bytearray(len(self.TEXT))
+ self.assertEqual(bz2f.readinto(b), n)
+ self.assertEqual(b[:n], self.TEXT[-n:])
+
def testReadLine(self):
- # "Test BZ2File.readline()"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.readline, None)
- sio = BytesIO(self.TEXT)
- for line in sio.readlines():
+ for line in self.TEXT_LINES:
+ self.assertEqual(bz2f.readline(), line)
+
+ def testReadLineMultiStream(self):
+ self.createTempFile(streams=5)
+ with BZ2File(self.filename) as bz2f:
+ self.assertRaises(TypeError, bz2f.readline, None)
+ for line in self.TEXT_LINES * 5:
self.assertEqual(bz2f.readline(), line)
def testReadLines(self):
- # "Test BZ2File.readlines()"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.readlines, None)
- sio = BytesIO(self.TEXT)
- self.assertEqual(bz2f.readlines(), sio.readlines())
+ self.assertEqual(bz2f.readlines(), self.TEXT_LINES)
+
+ def testReadLinesMultiStream(self):
+ self.createTempFile(streams=5)
+ with BZ2File(self.filename) as bz2f:
+ self.assertRaises(TypeError, bz2f.readlines, None)
+ self.assertEqual(bz2f.readlines(), self.TEXT_LINES * 5)
def testIterator(self):
- # "Test iter(BZ2File)"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
- sio = BytesIO(self.TEXT)
- self.assertEqual(list(iter(bz2f)), sio.readlines())
+ self.assertEqual(list(iter(bz2f)), self.TEXT_LINES)
+
+ def testIteratorMultiStream(self):
+ self.createTempFile(streams=5)
+ with BZ2File(self.filename) as bz2f:
+ self.assertEqual(list(iter(bz2f)), self.TEXT_LINES * 5)
def testClosedIteratorDeadlock(self):
- # "Test that iteration on a closed bz2file releases the lock."
- # http://bugs.python.org/issue3309
+ # Issue #3309: Iteration on a closed BZ2File should release the lock.
self.createTempFile()
bz2f = BZ2File(self.filename)
bz2f.close()
self.assertRaises(ValueError, bz2f.__next__)
- # This call will deadlock of the above .__next__ call failed to
+ # This call will deadlock if the above .__next__ call failed to
# release the lock.
self.assertRaises(ValueError, bz2f.readlines)
def testWrite(self):
- # "Test BZ2File.write()"
with BZ2File(self.filename, "w") as bz2f:
self.assertRaises(TypeError, bz2f.write)
bz2f.write(self.TEXT)
@@ -138,10 +227,9 @@ class BZ2FileTest(BaseTest):
self.assertEqual(self.decompress(f.read()), self.TEXT)
def testWriteChunks10(self):
- # "Test BZ2File.write() with chunks of 10 bytes"
with BZ2File(self.filename, "w") as bz2f:
n = 0
- while 1:
+ while True:
str = self.TEXT[n*10:(n+1)*10]
if not str:
break
@@ -150,13 +238,19 @@ class BZ2FileTest(BaseTest):
with open(self.filename, 'rb') as f:
self.assertEqual(self.decompress(f.read()), self.TEXT)
+ def testWriteNonDefaultCompressLevel(self):
+ expected = bz2.compress(self.TEXT, compresslevel=5)
+ with BZ2File(self.filename, "w", compresslevel=5) as bz2f:
+ bz2f.write(self.TEXT)
+ with open(self.filename, "rb") as f:
+ self.assertEqual(f.read(), expected)
+
def testWriteLines(self):
- # "Test BZ2File.writelines()"
with BZ2File(self.filename, "w") as bz2f:
self.assertRaises(TypeError, bz2f.writelines)
- sio = BytesIO(self.TEXT)
- bz2f.writelines(sio.readlines())
- # patch #1535500
+ bz2f.writelines(self.TEXT_LINES)
+ # Issue #1535500: Calling writelines() on a closed BZ2File
+ # should raise an exception.
self.assertRaises(ValueError, bz2f.writelines, ["a"])
with open(self.filename, 'rb') as f:
self.assertEqual(self.decompress(f.read()), self.TEXT)
@@ -169,39 +263,73 @@ class BZ2FileTest(BaseTest):
self.assertRaises(IOError, bz2f.write, b"a")
self.assertRaises(IOError, bz2f.writelines, [b"a"])
+ def testAppend(self):
+ with BZ2File(self.filename, "w") as bz2f:
+ self.assertRaises(TypeError, bz2f.write)
+ bz2f.write(self.TEXT)
+ with BZ2File(self.filename, "a") as bz2f:
+ self.assertRaises(TypeError, bz2f.write)
+ bz2f.write(self.TEXT)
+ with open(self.filename, 'rb') as f:
+ self.assertEqual(self.decompress(f.read()), self.TEXT * 2)
+
def testSeekForward(self):
- # "Test BZ2File.seek(150, 0)"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.seek)
bz2f.seek(150)
self.assertEqual(bz2f.read(), self.TEXT[150:])
+ def testSeekForwardAcrossStreams(self):
+ self.createTempFile(streams=2)
+ with BZ2File(self.filename) as bz2f:
+ self.assertRaises(TypeError, bz2f.seek)
+ bz2f.seek(len(self.TEXT) + 150)
+ self.assertEqual(bz2f.read(), self.TEXT[150:])
+
def testSeekBackwards(self):
- # "Test BZ2File.seek(-150, 1)"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
bz2f.read(500)
bz2f.seek(-150, 1)
self.assertEqual(bz2f.read(), self.TEXT[500-150:])
+ def testSeekBackwardsAcrossStreams(self):
+ self.createTempFile(streams=2)
+ with BZ2File(self.filename) as bz2f:
+ readto = len(self.TEXT) + 100
+ while readto > 0:
+ readto -= len(bz2f.read(readto))
+ bz2f.seek(-150, 1)
+ self.assertEqual(bz2f.read(), self.TEXT[100-150:] + self.TEXT)
+
def testSeekBackwardsFromEnd(self):
- # "Test BZ2File.seek(-150, 2)"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
bz2f.seek(-150, 2)
self.assertEqual(bz2f.read(), self.TEXT[len(self.TEXT)-150:])
+ def testSeekBackwardsFromEndAcrossStreams(self):
+ self.createTempFile(streams=2)
+ with BZ2File(self.filename) as bz2f:
+ bz2f.seek(-1000, 2)
+ self.assertEqual(bz2f.read(), (self.TEXT * 2)[-1000:])
+
def testSeekPostEnd(self):
- # "Test BZ2File.seek(150000)"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
bz2f.seek(150000)
self.assertEqual(bz2f.tell(), len(self.TEXT))
self.assertEqual(bz2f.read(), b"")
+ def testSeekPostEndMultiStream(self):
+ self.createTempFile(streams=5)
+ with BZ2File(self.filename) as bz2f:
+ bz2f.seek(150000)
+ self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
+ self.assertEqual(bz2f.read(), b"")
+
def testSeekPostEndTwice(self):
- # "Test BZ2File.seek(150000) twice"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
bz2f.seek(150000)
@@ -209,27 +337,109 @@ class BZ2FileTest(BaseTest):
self.assertEqual(bz2f.tell(), len(self.TEXT))
self.assertEqual(bz2f.read(), b"")
+ def testSeekPostEndTwiceMultiStream(self):
+ self.createTempFile(streams=5)
+ with BZ2File(self.filename) as bz2f:
+ bz2f.seek(150000)
+ bz2f.seek(150000)
+ self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
+ self.assertEqual(bz2f.read(), b"")
+
def testSeekPreStart(self):
- # "Test BZ2File.seek(-150, 0)"
self.createTempFile()
with BZ2File(self.filename) as bz2f:
bz2f.seek(-150)
self.assertEqual(bz2f.tell(), 0)
self.assertEqual(bz2f.read(), self.TEXT)
+ def testSeekPreStartMultiStream(self):
+ self.createTempFile(streams=2)
+ with BZ2File(self.filename) as bz2f:
+ bz2f.seek(-150)
+ self.assertEqual(bz2f.tell(), 0)
+ self.assertEqual(bz2f.read(), self.TEXT * 2)
+
+ def testFileno(self):
+ self.createTempFile()
+ with open(self.filename, 'rb') as rawf:
+ bz2f = BZ2File(rawf)
+ try:
+ self.assertEqual(bz2f.fileno(), rawf.fileno())
+ finally:
+ bz2f.close()
+ self.assertRaises(ValueError, bz2f.fileno)
+
+ def testSeekable(self):
+ bz2f = BZ2File(BytesIO(self.DATA))
+ try:
+ self.assertTrue(bz2f.seekable())
+ bz2f.read()
+ self.assertTrue(bz2f.seekable())
+ finally:
+ bz2f.close()
+ self.assertRaises(ValueError, bz2f.seekable)
+
+ bz2f = BZ2File(BytesIO(), mode="w")
+ try:
+ self.assertFalse(bz2f.seekable())
+ finally:
+ bz2f.close()
+ self.assertRaises(ValueError, bz2f.seekable)
+
+ src = BytesIO(self.DATA)
+ src.seekable = lambda: False
+ bz2f = BZ2File(src)
+ try:
+ self.assertFalse(bz2f.seekable())
+ finally:
+ bz2f.close()
+ self.assertRaises(ValueError, bz2f.seekable)
+
+ def testReadable(self):
+ bz2f = BZ2File(BytesIO(self.DATA))
+ try:
+ self.assertTrue(bz2f.readable())
+ bz2f.read()
+ self.assertTrue(bz2f.readable())
+ finally:
+ bz2f.close()
+ self.assertRaises(ValueError, bz2f.readable)
+
+ bz2f = BZ2File(BytesIO(), mode="w")
+ try:
+ self.assertFalse(bz2f.readable())
+ finally:
+ bz2f.close()
+ self.assertRaises(ValueError, bz2f.readable)
+
+ def testWritable(self):
+ bz2f = BZ2File(BytesIO(self.DATA))
+ try:
+ self.assertFalse(bz2f.writable())
+ bz2f.read()
+ self.assertFalse(bz2f.writable())
+ finally:
+ bz2f.close()
+ self.assertRaises(ValueError, bz2f.writable)
+
+ bz2f = BZ2File(BytesIO(), mode="w")
+ try:
+ self.assertTrue(bz2f.writable())
+ finally:
+ bz2f.close()
+ self.assertRaises(ValueError, bz2f.writable)
+
def testOpenDel(self):
- # "Test opening and deleting a file many times"
self.createTempFile()
for i in range(10000):
o = BZ2File(self.filename)
del o
def testOpenNonexistent(self):
- # "Test opening a nonexistent file"
self.assertRaises(IOError, BZ2File, "/non/existent")
- def testBug1191043(self):
- # readlines() for files containing no newline
+ def testReadlinesNoNewline(self):
+ # Issue #1191043: readlines() fails on a file containing no newline.
data = b'BZh91AY&SY\xd9b\x89]\x00\x00\x00\x03\x80\x04\x00\x02\x00\x0c\x00 \x00!\x9ah3M\x13<]\xc9\x14\xe1BCe\x8a%t'
with open(self.filename, "wb") as f:
f.write(data)
@@ -241,7 +451,6 @@ class BZ2FileTest(BaseTest):
self.assertEqual(xlines, [b'Test'])
def testContextProtocol(self):
- # BZ2File supports the context management protocol
f = None
with BZ2File(self.filename, "wb") as f:
f.write(b"xxx")
@@ -264,7 +473,7 @@ class BZ2FileTest(BaseTest):
@unittest.skipUnless(threading, 'Threading required for this test.')
def testThreading(self):
- # Using a BZ2File from several threads doesn't deadlock (issue #7205).
+ # Issue #7205: Using a BZ2File from several threads shouldn't deadlock.
data = b"1" * 2**20
nthreads = 10
with bz2.BZ2File(self.filename, 'wb') as f:
@@ -277,22 +486,98 @@ class BZ2FileTest(BaseTest):
for t in threads:
t.join()
- def testMixedIterationReads(self):
- # Issue #8397: mixed iteration and reads should be forbidden.
- with bz2.BZ2File(self.filename, 'wb') as f:
- # The internal buffer size is hard-wired to 8192 bytes, we must
- # write out more than that for the test to stop half through
- # the buffer.
- f.write(self.TEXT * 100)
- with bz2.BZ2File(self.filename, 'rb') as f:
- next(f)
- self.assertRaises(ValueError, f.read)
- self.assertRaises(ValueError, f.readline)
- self.assertRaises(ValueError, f.readlines)
+ def testWithoutThreading(self):
+ bz2 = support.import_fresh_module("bz2", blocked=("threading",))
+ with bz2.BZ2File(self.filename, "wb") as f:
+ f.write(b"abc")
+ with bz2.BZ2File(self.filename, "rb") as f:
+ self.assertEqual(f.read(), b"abc")
+
+ def testMixedIterationAndReads(self):
+ self.createTempFile()
+ linelen = len(self.TEXT_LINES[0])
+ halflen = linelen // 2
+ with bz2.BZ2File(self.filename) as bz2f:
+ bz2f.read(halflen)
+ self.assertEqual(next(bz2f), self.TEXT_LINES[0][halflen:])
+ self.assertEqual(bz2f.read(), self.TEXT[linelen:])
+ with bz2.BZ2File(self.filename) as bz2f:
+ bz2f.readline()
+ self.assertEqual(next(bz2f), self.TEXT_LINES[1])
+ self.assertEqual(bz2f.readline(), self.TEXT_LINES[2])
+ with bz2.BZ2File(self.filename) as bz2f:
+ bz2f.readlines()
+ with self.assertRaises(StopIteration):
+ next(bz2f)
+ self.assertEqual(bz2f.readlines(), [])
+
+ def testMultiStreamOrdering(self):
+ # Test the ordering of streams when reading a multi-stream archive.
+ data1 = b"foo" * 1000
+ data2 = b"bar" * 1000
+ with BZ2File(self.filename, "w") as bz2f:
+ bz2f.write(data1)
+ with BZ2File(self.filename, "a") as bz2f:
+ bz2f.write(data2)
+ with BZ2File(self.filename) as bz2f:
+ self.assertEqual(bz2f.read(), data1 + data2)
+
+ def testOpenBytesFilename(self):
+ str_filename = self.filename
+ try:
+ bytes_filename = str_filename.encode("ascii")
+ except UnicodeEncodeError:
+ self.skipTest("Temporary file name needs to be ASCII")
+ with BZ2File(bytes_filename, "wb") as f:
+ f.write(self.DATA)
+ with BZ2File(bytes_filename, "rb") as f:
+ self.assertEqual(f.read(), self.DATA)
+ # Sanity check that we are actually operating on the right file.
+ with BZ2File(str_filename, "rb") as f:
+ self.assertEqual(f.read(), self.DATA)
+
+
+ # Tests for a BZ2File wrapping another file object:
+
+ def testReadBytesIO(self):
+ with BytesIO(self.DATA) as bio:
+ with BZ2File(bio) as bz2f:
+ self.assertRaises(TypeError, bz2f.read, None)
+ self.assertEqual(bz2f.read(), self.TEXT)
+ self.assertFalse(bio.closed)
+
+ def testPeekBytesIO(self):
+ with BytesIO(self.DATA) as bio:
+ with BZ2File(bio) as bz2f:
+ pdata = bz2f.peek()
+ self.assertNotEqual(len(pdata), 0)
+ self.assertTrue(self.TEXT.startswith(pdata))
+ self.assertEqual(bz2f.read(), self.TEXT)
+
+ def testWriteBytesIO(self):
+ with BytesIO() as bio:
+ with BZ2File(bio, "w") as bz2f:
+ self.assertRaises(TypeError, bz2f.write)
+ bz2f.write(self.TEXT)
+ self.assertEqual(self.decompress(bio.getvalue()), self.TEXT)
+ self.assertFalse(bio.closed)
+
+ def testSeekForwardBytesIO(self):
+ with BytesIO(self.DATA) as bio:
+ with BZ2File(bio) as bz2f:
+ self.assertRaises(TypeError, bz2f.seek)
+ bz2f.seek(150)
+ self.assertEqual(bz2f.read(), self.TEXT[150:])
+
+ def testSeekBackwardsBytesIO(self):
+ with BytesIO(self.DATA) as bio:
+ with BZ2File(bio) as bz2f:
+ bz2f.read(500)
+ bz2f.seek(-150, 1)
+ self.assertEqual(bz2f.read(), self.TEXT[500-150:])
class BZ2CompressorTest(BaseTest):
def testCompress(self):
- # "Test BZ2Compressor.compress()/flush()"
bz2c = BZ2Compressor()
self.assertRaises(TypeError, bz2c.compress)
data = bz2c.compress(self.TEXT)
@@ -300,11 +585,10 @@ class BZ2CompressorTest(BaseTest):
self.assertEqual(self.decompress(data), self.TEXT)
def testCompressChunks10(self):
- # "Test BZ2Compressor.compress()/flush() with chunks of 10 bytes"
bz2c = BZ2Compressor()
n = 0
data = b''
- while 1:
+ while True:
str = self.TEXT[n*10:(n+1)*10]
if not str:
break
@@ -313,23 +597,38 @@ class BZ2CompressorTest(BaseTest):
data += bz2c.flush()
self.assertEqual(self.decompress(data), self.TEXT)
+ @bigmemtest(size=_4G + 100, memuse=2)
+ def testCompress4G(self, size):
+ # "Test BZ2Compressor.compress()/flush() with >4GiB input"
+ bz2c = BZ2Compressor()
+ data = b"x" * size
+ try:
+ compressed = bz2c.compress(data)
+ compressed += bz2c.flush()
+ finally:
+ data = None # Release memory
+ data = bz2.decompress(compressed)
+ try:
+ self.assertEqual(len(data), size)
+ self.assertEqual(len(data.strip(b"x")), 0)
+ finally:
+ data = None
+
class BZ2DecompressorTest(BaseTest):
def test_Constructor(self):
self.assertRaises(TypeError, BZ2Decompressor, 42)
def testDecompress(self):
- # "Test BZ2Decompressor.decompress()"
bz2d = BZ2Decompressor()
self.assertRaises(TypeError, bz2d.decompress)
text = bz2d.decompress(self.DATA)
self.assertEqual(text, self.TEXT)
def testDecompressChunks10(self):
- # "Test BZ2Decompressor.decompress() with chunks of 10 bytes"
bz2d = BZ2Decompressor()
text = b''
n = 0
- while 1:
+ while True:
str = self.DATA[n*10:(n+1)*10]
if not str:
break
@@ -338,7 +637,6 @@ class BZ2DecompressorTest(BaseTest):
self.assertEqual(text, self.TEXT)
def testDecompressUnusedData(self):
- # "Test BZ2Decompressor.decompress() with unused data"
bz2d = BZ2Decompressor()
unused_data = b"this is unused data"
text = bz2d.decompress(self.DATA+unused_data)
@@ -346,44 +644,152 @@ class BZ2DecompressorTest(BaseTest):
self.assertEqual(bz2d.unused_data, unused_data)
def testEOFError(self):
- # "Calling BZ2Decompressor.decompress() after EOS must raise EOFError"
bz2d = BZ2Decompressor()
text = bz2d.decompress(self.DATA)
self.assertRaises(EOFError, bz2d.decompress, b"anything")
-
-class FuncTest(BaseTest):
- "Test module functions"
-
+ @bigmemtest(size=_4G + 100, memuse=3)
+ def testDecompress4G(self, size):
+ # "Test BZ2Decompressor.decompress() with >4GiB input"
+ blocksize = 10 * 1024 * 1024
+ block = random.getrandbits(blocksize * 8).to_bytes(blocksize, 'little')
+ try:
+ data = block * (size // blocksize + 1)
+ compressed = bz2.compress(data)
+ bz2d = BZ2Decompressor()
+ decompressed = bz2d.decompress(compressed)
+ self.assertTrue(decompressed == data)
+ finally:
+ data = None
+ compressed = None
+ decompressed = None
+
+
+class CompressDecompressTest(BaseTest):
def testCompress(self):
- # "Test compress() function"
data = bz2.compress(self.TEXT)
self.assertEqual(self.decompress(data), self.TEXT)
def testDecompress(self):
- # "Test decompress() function"
text = bz2.decompress(self.DATA)
self.assertEqual(text, self.TEXT)
def testDecompressEmpty(self):
- # "Test decompress() function with empty string"
text = bz2.decompress(b"")
self.assertEqual(text, b"")
def testDecompressIncomplete(self):
- # "Test decompress() function with incomplete data"
self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10])
+ def testDecompressMultiStream(self):
+ text = bz2.decompress(self.DATA * 5)
+ self.assertEqual(text, self.TEXT * 5)
+
+
+class OpenTest(BaseTest):
+ def test_binary_modes(self):
+ with bz2.open(self.filename, "wb") as f:
+ f.write(self.TEXT)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read())
+ self.assertEqual(file_data, self.TEXT)
+ with bz2.open(self.filename, "rb") as f:
+ self.assertEqual(f.read(), self.TEXT)
+ with bz2.open(self.filename, "ab") as f:
+ f.write(self.TEXT)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read())
+ self.assertEqual(file_data, self.TEXT * 2)
+
+ def test_implicit_binary_modes(self):
+ # Test implicit binary modes (no "b" or "t" in mode string).
+ with bz2.open(self.filename, "w") as f:
+ f.write(self.TEXT)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read())
+ self.assertEqual(file_data, self.TEXT)
+ with bz2.open(self.filename, "r") as f:
+ self.assertEqual(f.read(), self.TEXT)
+ with bz2.open(self.filename, "a") as f:
+ f.write(self.TEXT)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read())
+ self.assertEqual(file_data, self.TEXT * 2)
+
+ def test_text_modes(self):
+ text = self.TEXT.decode("ascii")
+ text_native_eol = text.replace("\n", os.linesep)
+ with bz2.open(self.filename, "wt") as f:
+ f.write(text)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read()).decode("ascii")
+ self.assertEqual(file_data, text_native_eol)
+ with bz2.open(self.filename, "rt") as f:
+ self.assertEqual(f.read(), text)
+ with bz2.open(self.filename, "at") as f:
+ f.write(text)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read()).decode("ascii")
+ self.assertEqual(file_data, text_native_eol * 2)
+
+ def test_fileobj(self):
+ with bz2.open(BytesIO(self.DATA), "r") as f:
+ self.assertEqual(f.read(), self.TEXT)
+ with bz2.open(BytesIO(self.DATA), "rb") as f:
+ self.assertEqual(f.read(), self.TEXT)
+ text = self.TEXT.decode("ascii")
+ with bz2.open(BytesIO(self.DATA), "rt") as f:
+ self.assertEqual(f.read(), text)
+
+ def test_bad_params(self):
+ # Test invalid parameter combinations.
+ with self.assertRaises(ValueError):
+ bz2.open(self.filename, "wbt")
+ with self.assertRaises(ValueError):
+ bz2.open(self.filename, "rb", encoding="utf-8")
+ with self.assertRaises(ValueError):
+ bz2.open(self.filename, "rb", errors="ignore")
+ with self.assertRaises(ValueError):
+ bz2.open(self.filename, "rb", newline="\n")
+
+ def test_encoding(self):
+ # Test non-default encoding.
+ text = self.TEXT.decode("ascii")
+ text_native_eol = text.replace("\n", os.linesep)
+ with bz2.open(self.filename, "wt", encoding="utf-16-le") as f:
+ f.write(text)
+ with open(self.filename, "rb") as f:
+ file_data = bz2.decompress(f.read()).decode("utf-16-le")
+ self.assertEqual(file_data, text_native_eol)
+ with bz2.open(self.filename, "rt", encoding="utf-16-le") as f:
+ self.assertEqual(f.read(), text)
+
+ def test_encoding_error_handler(self):
+ # Test with non-default encoding error handler.
+ with bz2.open(self.filename, "wb") as f:
+ f.write(b"foo\xffbar")
+ with bz2.open(self.filename, "rt", encoding="ascii", errors="ignore") \
+ as f:
+ self.assertEqual(f.read(), "foobar")
+
+ def test_newline(self):
+ # Test with explicit newline (universal newline mode disabled).
+ text = self.TEXT.decode("ascii")
+ with bz2.open(self.filename, "wt", newline="\n") as f:
+ f.write(text)
+ with bz2.open(self.filename, "rt", newline="\r") as f:
+ self.assertEqual(f.readlines(), [text])
+
+
def test_main():
support.run_unittest(
BZ2FileTest,
BZ2CompressorTest,
BZ2DecompressorTest,
- FuncTest
+ CompressDecompressTest,
+ OpenTest,
)
support.reap_children()
if __name__ == '__main__':
test_main()
-
-# vim:ts=4:sw=4
diff --git a/Lib/test/test_calendar.py b/Lib/test/test_calendar.py
index 948a119..e594e01 100644
--- a/Lib/test/test_calendar.py
+++ b/Lib/test/test_calendar.py
@@ -5,8 +5,19 @@ from test import support
from test.script_helper import assert_python_ok
import time
import locale
+import sys
import datetime
+result_2004_01_text = """
+ January 2004
+Mo Tu We Th Fr Sa Su
+ 1 2 3 4
+ 5 6 7 8 9 10 11
+12 13 14 15 16 17 18
+19 20 21 22 23 24 25
+26 27 28 29 30 31
+"""
+
result_2004_text = """
2004
@@ -46,11 +57,11 @@ Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su
"""
result_2004_html = """
-<?xml version="1.0" encoding="ascii"?>
+<?xml version="1.0" encoding="%(e)s"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
-<meta http-equiv="Content-Type" content="text/html; charset=ascii" />
+<meta http-equiv="Content-Type" content="text/html; charset=%(e)s" />
<link rel="stylesheet" type="text/css" href="calendar.css" />
<title>Calendar for 2004</title>
</head>
@@ -170,6 +181,135 @@ result_2004_html = """
</html>
"""
+result_2004_days = [
+ [[[0, 0, 0, 1, 2, 3, 4],
+ [5, 6, 7, 8, 9, 10, 11],
+ [12, 13, 14, 15, 16, 17, 18],
+ [19, 20, 21, 22, 23, 24, 25],
+ [26, 27, 28, 29, 30, 31, 0]],
+ [[0, 0, 0, 0, 0, 0, 1],
+ [2, 3, 4, 5, 6, 7, 8],
+ [9, 10, 11, 12, 13, 14, 15],
+ [16, 17, 18, 19, 20, 21, 22],
+ [23, 24, 25, 26, 27, 28, 29]],
+ [[1, 2, 3, 4, 5, 6, 7],
+ [8, 9, 10, 11, 12, 13, 14],
+ [15, 16, 17, 18, 19, 20, 21],
+ [22, 23, 24, 25, 26, 27, 28],
+ [29, 30, 31, 0, 0, 0, 0]]],
+ [[[0, 0, 0, 1, 2, 3, 4],
+ [5, 6, 7, 8, 9, 10, 11],
+ [12, 13, 14, 15, 16, 17, 18],
+ [19, 20, 21, 22, 23, 24, 25],
+ [26, 27, 28, 29, 30, 0, 0]],
+ [[0, 0, 0, 0, 0, 1, 2],
+ [3, 4, 5, 6, 7, 8, 9],
+ [10, 11, 12, 13, 14, 15, 16],
+ [17, 18, 19, 20, 21, 22, 23],
+ [24, 25, 26, 27, 28, 29, 30],
+ [31, 0, 0, 0, 0, 0, 0]],
+ [[0, 1, 2, 3, 4, 5, 6],
+ [7, 8, 9, 10, 11, 12, 13],
+ [14, 15, 16, 17, 18, 19, 20],
+ [21, 22, 23, 24, 25, 26, 27],
+ [28, 29, 30, 0, 0, 0, 0]]],
+ [[[0, 0, 0, 1, 2, 3, 4],
+ [5, 6, 7, 8, 9, 10, 11],
+ [12, 13, 14, 15, 16, 17, 18],
+ [19, 20, 21, 22, 23, 24, 25],
+ [26, 27, 28, 29, 30, 31, 0]],
+ [[0, 0, 0, 0, 0, 0, 1],
+ [2, 3, 4, 5, 6, 7, 8],
+ [9, 10, 11, 12, 13, 14, 15],
+ [16, 17, 18, 19, 20, 21, 22],
+ [23, 24, 25, 26, 27, 28, 29],
+ [30, 31, 0, 0, 0, 0, 0]],
+ [[0, 0, 1, 2, 3, 4, 5],
+ [6, 7, 8, 9, 10, 11, 12],
+ [13, 14, 15, 16, 17, 18, 19],
+ [20, 21, 22, 23, 24, 25, 26],
+ [27, 28, 29, 30, 0, 0, 0]]],
+ [[[0, 0, 0, 0, 1, 2, 3],
+ [4, 5, 6, 7, 8, 9, 10],
+ [11, 12, 13, 14, 15, 16, 17],
+ [18, 19, 20, 21, 22, 23, 24],
+ [25, 26, 27, 28, 29, 30, 31]],
+ [[1, 2, 3, 4, 5, 6, 7],
+ [8, 9, 10, 11, 12, 13, 14],
+ [15, 16, 17, 18, 19, 20, 21],
+ [22, 23, 24, 25, 26, 27, 28],
+ [29, 30, 0, 0, 0, 0, 0]],
+ [[0, 0, 1, 2, 3, 4, 5],
+ [6, 7, 8, 9, 10, 11, 12],
+ [13, 14, 15, 16, 17, 18, 19],
+ [20, 21, 22, 23, 24, 25, 26],
+ [27, 28, 29, 30, 31, 0, 0]]]
+]
+
+result_2004_dates = \
+ [[['12/29/03 12/30/03 12/31/03 01/01/04 01/02/04 01/03/04 01/04/04',
+ '01/05/04 01/06/04 01/07/04 01/08/04 01/09/04 01/10/04 01/11/04',
+ '01/12/04 01/13/04 01/14/04 01/15/04 01/16/04 01/17/04 01/18/04',
+ '01/19/04 01/20/04 01/21/04 01/22/04 01/23/04 01/24/04 01/25/04',
+ '01/26/04 01/27/04 01/28/04 01/29/04 01/30/04 01/31/04 02/01/04'],
+ ['01/26/04 01/27/04 01/28/04 01/29/04 01/30/04 01/31/04 02/01/04',
+ '02/02/04 02/03/04 02/04/04 02/05/04 02/06/04 02/07/04 02/08/04',
+ '02/09/04 02/10/04 02/11/04 02/12/04 02/13/04 02/14/04 02/15/04',
+ '02/16/04 02/17/04 02/18/04 02/19/04 02/20/04 02/21/04 02/22/04',
+ '02/23/04 02/24/04 02/25/04 02/26/04 02/27/04 02/28/04 02/29/04'],
+ ['03/01/04 03/02/04 03/03/04 03/04/04 03/05/04 03/06/04 03/07/04',
+ '03/08/04 03/09/04 03/10/04 03/11/04 03/12/04 03/13/04 03/14/04',
+ '03/15/04 03/16/04 03/17/04 03/18/04 03/19/04 03/20/04 03/21/04',
+ '03/22/04 03/23/04 03/24/04 03/25/04 03/26/04 03/27/04 03/28/04',
+ '03/29/04 03/30/04 03/31/04 04/01/04 04/02/04 04/03/04 04/04/04']],
+ [['03/29/04 03/30/04 03/31/04 04/01/04 04/02/04 04/03/04 04/04/04',
+ '04/05/04 04/06/04 04/07/04 04/08/04 04/09/04 04/10/04 04/11/04',
+ '04/12/04 04/13/04 04/14/04 04/15/04 04/16/04 04/17/04 04/18/04',
+ '04/19/04 04/20/04 04/21/04 04/22/04 04/23/04 04/24/04 04/25/04',
+ '04/26/04 04/27/04 04/28/04 04/29/04 04/30/04 05/01/04 05/02/04'],
+ ['04/26/04 04/27/04 04/28/04 04/29/04 04/30/04 05/01/04 05/02/04',
+ '05/03/04 05/04/04 05/05/04 05/06/04 05/07/04 05/08/04 05/09/04',
+ '05/10/04 05/11/04 05/12/04 05/13/04 05/14/04 05/15/04 05/16/04',
+ '05/17/04 05/18/04 05/19/04 05/20/04 05/21/04 05/22/04 05/23/04',
+ '05/24/04 05/25/04 05/26/04 05/27/04 05/28/04 05/29/04 05/30/04',
+ '05/31/04 06/01/04 06/02/04 06/03/04 06/04/04 06/05/04 06/06/04'],
+ ['05/31/04 06/01/04 06/02/04 06/03/04 06/04/04 06/05/04 06/06/04',
+ '06/07/04 06/08/04 06/09/04 06/10/04 06/11/04 06/12/04 06/13/04',
+ '06/14/04 06/15/04 06/16/04 06/17/04 06/18/04 06/19/04 06/20/04',
+ '06/21/04 06/22/04 06/23/04 06/24/04 06/25/04 06/26/04 06/27/04',
+ '06/28/04 06/29/04 06/30/04 07/01/04 07/02/04 07/03/04 07/04/04']],
+ [['06/28/04 06/29/04 06/30/04 07/01/04 07/02/04 07/03/04 07/04/04',
+ '07/05/04 07/06/04 07/07/04 07/08/04 07/09/04 07/10/04 07/11/04',
+ '07/12/04 07/13/04 07/14/04 07/15/04 07/16/04 07/17/04 07/18/04',
+ '07/19/04 07/20/04 07/21/04 07/22/04 07/23/04 07/24/04 07/25/04',
+ '07/26/04 07/27/04 07/28/04 07/29/04 07/30/04 07/31/04 08/01/04'],
+ ['07/26/04 07/27/04 07/28/04 07/29/04 07/30/04 07/31/04 08/01/04',
+ '08/02/04 08/03/04 08/04/04 08/05/04 08/06/04 08/07/04 08/08/04',
+ '08/09/04 08/10/04 08/11/04 08/12/04 08/13/04 08/14/04 08/15/04',
+ '08/16/04 08/17/04 08/18/04 08/19/04 08/20/04 08/21/04 08/22/04',
+ '08/23/04 08/24/04 08/25/04 08/26/04 08/27/04 08/28/04 08/29/04',
+ '08/30/04 08/31/04 09/01/04 09/02/04 09/03/04 09/04/04 09/05/04'],
+ ['08/30/04 08/31/04 09/01/04 09/02/04 09/03/04 09/04/04 09/05/04',
+ '09/06/04 09/07/04 09/08/04 09/09/04 09/10/04 09/11/04 09/12/04',
+ '09/13/04 09/14/04 09/15/04 09/16/04 09/17/04 09/18/04 09/19/04',
+ '09/20/04 09/21/04 09/22/04 09/23/04 09/24/04 09/25/04 09/26/04',
+ '09/27/04 09/28/04 09/29/04 09/30/04 10/01/04 10/02/04 10/03/04']],
+ [['09/27/04 09/28/04 09/29/04 09/30/04 10/01/04 10/02/04 10/03/04',
+ '10/04/04 10/05/04 10/06/04 10/07/04 10/08/04 10/09/04 10/10/04',
+ '10/11/04 10/12/04 10/13/04 10/14/04 10/15/04 10/16/04 10/17/04',
+ '10/18/04 10/19/04 10/20/04 10/21/04 10/22/04 10/23/04 10/24/04',
+ '10/25/04 10/26/04 10/27/04 10/28/04 10/29/04 10/30/04 10/31/04'],
+ ['11/01/04 11/02/04 11/03/04 11/04/04 11/05/04 11/06/04 11/07/04',
+ '11/08/04 11/09/04 11/10/04 11/11/04 11/12/04 11/13/04 11/14/04',
+ '11/15/04 11/16/04 11/17/04 11/18/04 11/19/04 11/20/04 11/21/04',
+ '11/22/04 11/23/04 11/24/04 11/25/04 11/26/04 11/27/04 11/28/04',
+ '11/29/04 11/30/04 12/01/04 12/02/04 12/03/04 12/04/04 12/05/04'],
+ ['11/29/04 11/30/04 12/01/04 12/02/04 12/03/04 12/04/04 12/05/04',
+ '12/06/04 12/07/04 12/08/04 12/09/04 12/10/04 12/11/04 12/12/04',
+ '12/13/04 12/14/04 12/15/04 12/16/04 12/17/04 12/18/04 12/19/04',
+ '12/20/04 12/21/04 12/22/04 12/23/04 12/24/04 12/25/04 12/26/04',
+ '12/27/04 12/28/04 12/29/04 12/30/04 12/31/04 01/01/05 01/02/05']]]
+
class OutputTestCase(unittest.TestCase):
def normalize_calendar(self, s):
@@ -178,12 +318,19 @@ class OutputTestCase(unittest.TestCase):
return not c.isspace() and not c.isdigit()
lines = []
- for line in s.splitlines(False):
+ for line in s.splitlines(keepends=False):
# Drop texts, as they are locale dependent
if line and not filter(neitherspacenordigit, line):
lines.append(line)
return lines
+ def check_htmlcalendar_encoding(self, req, res):
+ cal = calendar.HTMLCalendar()
+ self.assertEqual(
+ cal.formatyearpage(2004, encoding=req).strip(b' \t\n'),
+ (result_2004_html % {'e': res}).strip(' \t\n').encode(res)
+ )
+
def test_output(self):
self.assertEqual(
self.normalize_calendar(calendar.calendar(2004)),
@@ -196,12 +343,60 @@ class OutputTestCase(unittest.TestCase):
result_2004_text.strip()
)
- def test_output_htmlcalendar(self):
- encoding = 'ascii'
- cal = calendar.HTMLCalendar()
+ def test_output_htmlcalendar_encoding_ascii(self):
+ self.check_htmlcalendar_encoding('ascii', 'ascii')
+
+ def test_output_htmlcalendar_encoding_utf8(self):
+ self.check_htmlcalendar_encoding('utf-8', 'utf-8')
+
+ def test_output_htmlcalendar_encoding_default(self):
+ self.check_htmlcalendar_encoding(None, sys.getdefaultencoding())
+
+ def test_yeardatescalendar(self):
+ def shrink(cal):
+ return [[[' '.join('{:02d}/{:02d}/{}'.format(
+ d.month, d.day, str(d.year)[-2:]) for d in z)
+ for z in y] for y in x] for x in cal]
+ self.assertEqual(
+ shrink(calendar.Calendar().yeardatescalendar(2004)),
+ result_2004_dates
+ )
+
+ def test_yeardayscalendar(self):
self.assertEqual(
- cal.formatyearpage(2004, encoding=encoding).strip(b' \t\n'),
- result_2004_html.strip(' \t\n').encode(encoding)
+ calendar.Calendar().yeardayscalendar(2004),
+ result_2004_days
+ )
+
+ def test_formatweekheader_short(self):
+ self.assertEqual(
+ calendar.TextCalendar().formatweekheader(2),
+ 'Mo Tu We Th Fr Sa Su'
+ )
+
+ def test_formatweekheader_long(self):
+ self.assertEqual(
+ calendar.TextCalendar().formatweekheader(9),
+ ' Monday Tuesday Wednesday Thursday '
+ ' Friday Saturday Sunday '
+ )
+
+ def test_formatmonth(self):
+ self.assertEqual(
+ calendar.TextCalendar().formatmonth(2004, 1).strip(),
+ result_2004_01_text.strip()
+ )
+
+ def test_formatmonthname_with_year(self):
+ self.assertEqual(
+ calendar.HTMLCalendar().formatmonthname(2004, 1, withyear=True),
+ '<tr><th colspan="7" class="month">January 2004</th></tr>'
+ )
+
+ def test_formatmonthname_without_year(self):
+ self.assertEqual(
+ calendar.HTMLCalendar().formatmonthname(2004, 1, withyear=False),
+ '<tr><th colspan="7" class="month">January</th></tr>'
)
@@ -227,7 +422,11 @@ class CalendarTestCase(unittest.TestCase):
self.assertEqual(calendar.firstweekday(), calendar.MONDAY)
calendar.setfirstweekday(orig)
- def test_enumerateweekdays(self):
+ def test_illegal_weekday_reported(self):
+ with self.assertRaisesRegex(calendar.IllegalWeekdayError, '123'):
+ calendar.setfirstweekday(123)
+
+ def test_enumerate_weekdays(self):
self.assertRaises(IndexError, calendar.day_abbr.__getitem__, -10)
self.assertRaises(IndexError, calendar.day_name.__getitem__, 10)
self.assertEqual(len([d for d in calendar.day_abbr]), 7)
@@ -253,7 +452,7 @@ class CalendarTestCase(unittest.TestCase):
# verify it "acts like a sequence" in two forms of iteration
self.assertEqual(value[::-1], list(reversed(value)))
- def test_localecalendars(self):
+ def test_locale_calendars(self):
# ensure that Locale{Text,HTML}Calendar resets the locale properly
# (it is still not thread-safe though)
old_october = calendar.TextCalendar().formatmonthname(2010, 10, 10)
@@ -437,6 +636,10 @@ class MonthRangeTestCase(unittest.TestCase):
with self.assertRaises(calendar.IllegalMonthError):
calendar.monthrange(2004, 13)
+ def test_illegal_month_reported(self):
+ with self.assertRaisesRegex(calendar.IllegalMonthError, '65'):
+ calendar.monthrange(2004, 65)
+
class LeapdaysTestCase(unittest.TestCase):
def test_no_range(self):
# test when no range i.e. two identical years as args
diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py
index 4d931f8..af15a3d 100644
--- a/Lib/test/test_capi.py
+++ b/Lib/test/test_capi.py
@@ -52,13 +52,36 @@ class CAPITest(unittest.TestCase):
(out, err) = p.communicate()
self.assertEqual(out, b'')
# This used to cause an infinite loop.
- self.assertEqual(err.rstrip(),
+ self.assertTrue(err.rstrip().startswith(
b'Fatal Python error:'
- b' PyThreadState_Get: no current thread')
+ b' PyThreadState_Get: no current thread'))
def test_memoryview_from_NULL_pointer(self):
self.assertRaises(ValueError, _testcapi.make_memoryview_from_NULL_pointer)
+ def test_exc_info(self):
+ raised_exception = ValueError("5")
+ new_exc = TypeError("TEST")
+ try:
+ raise raised_exception
+ except ValueError as e:
+ tb = e.__traceback__
+ orig_sys_exc_info = sys.exc_info()
+ orig_exc_info = _testcapi.set_exc_info(new_exc.__class__, new_exc, None)
+ new_sys_exc_info = sys.exc_info()
+ new_exc_info = _testcapi.set_exc_info(*orig_exc_info)
+ reset_sys_exc_info = sys.exc_info()
+
+ self.assertEqual(orig_exc_info[1], e)
+
+ self.assertSequenceEqual(orig_exc_info, (raised_exception.__class__, raised_exception, tb))
+ self.assertSequenceEqual(orig_sys_exc_info, orig_exc_info)
+ self.assertSequenceEqual(reset_sys_exc_info, orig_exc_info)
+ self.assertSequenceEqual(new_exc_info, (new_exc.__class__, new_exc, None))
+ self.assertSequenceEqual(new_sys_exc_info, new_exc_info)
+ else:
+ self.assertTrue(False)
+
@unittest.skipUnless(_posixsubprocess, '_posixsubprocess required for this test.')
def test_seq_bytes_to_charp_array(self):
# Issue #15732: crash in _PySequence_BytesToCharpArray()
@@ -222,9 +245,91 @@ class EmbeddingTest(unittest.TestCase):
finally:
os.chdir(oldcwd)
+class SkipitemTest(unittest.TestCase):
+
+ def test_skipitem(self):
+ """
+ If this test failed, you probably added a new "format unit"
+ in Python/getargs.c, but neglected to update our poor friend
+ skipitem() in the same file. (If so, shame on you!)
+
+ With a few exceptions**, this function brute-force tests all
+ printable ASCII*** characters (32 to 126 inclusive) as format units,
+ checking to see that PyArg_ParseTupleAndKeywords() return consistent
+ errors both when the unit is attempted to be used and when it is
+ skipped. If the format unit doesn't exist, we'll get one of two
+ specific error messages (one for used, one for skipped); if it does
+ exist we *won't* get that error--we'll get either no error or some
+ other error. If we get the specific "does not exist" error for one
+ test and not for the other, there's a mismatch, and the test fails.
+
+ ** Some format units have special funny semantics and it would
+ be difficult to accomodate them here. Since these are all
+ well-established and properly skipped in skipitem() we can
+ get away with not testing them--this test is really intended
+ to catch *new* format units.
+
+ *** Python C source files must be ASCII. Therefore it's impossible
+ to have non-ASCII format units.
+
+ """
+ empty_tuple = ()
+ tuple_1 = (0,)
+ dict_b = {'b':1}
+ keywords = ["a", "b"]
+
+ for i in range(32, 127):
+ c = chr(i)
+
+ # skip parentheses, the error reporting is inconsistent about them
+ # skip 'e', it's always a two-character code
+ # skip '|' and '$', they don't represent arguments anyway
+ if c in '()e|$':
+ continue
+
+ # test the format unit when not skipped
+ format = c + "i"
+ try:
+ # (note: the format string must be bytes!)
+ _testcapi.parse_tuple_and_keywords(tuple_1, dict_b,
+ format.encode("ascii"), keywords)
+ when_not_skipped = False
+ except TypeError as e:
+ s = "argument 1 must be impossible<bad format char>, not int"
+ when_not_skipped = (str(e) == s)
+ except RuntimeError as e:
+ when_not_skipped = False
+
+ # test the format unit when skipped
+ optional_format = "|" + format
+ try:
+ _testcapi.parse_tuple_and_keywords(empty_tuple, dict_b,
+ optional_format.encode("ascii"), keywords)
+ when_skipped = False
+ except RuntimeError as e:
+ s = "impossible<bad format char>: '{}'".format(format)
+ when_skipped = (str(e) == s)
+
+ message = ("test_skipitem_parity: "
+ "detected mismatch between convertsimple and skipitem "
+ "for format unit '{}' ({}), not skipped {}, skipped {}".format(
+ c, i, when_skipped, when_not_skipped))
+ self.assertIs(when_skipped, when_not_skipped, message)
+
+ def test_parse_tuple_and_keywords(self):
+ # parse_tuple_and_keywords error handling tests
+ self.assertRaises(TypeError, _testcapi.parse_tuple_and_keywords,
+ (), {}, 42, [])
+ self.assertRaises(ValueError, _testcapi.parse_tuple_and_keywords,
+ (), {}, b'', 42)
+ self.assertRaises(ValueError, _testcapi.parse_tuple_and_keywords,
+ (), {}, b'', [''] * 42)
+ self.assertRaises(ValueError, _testcapi.parse_tuple_and_keywords,
+ (), {}, b'', [42])
def test_main():
- support.run_unittest(CAPITest, TestPendingCalls, Test6012, EmbeddingTest)
+ support.run_unittest(CAPITest, TestPendingCalls,
+ Test6012, EmbeddingTest, SkipitemTest)
for name in dir(_testcapi):
if name.startswith('test_'):
@@ -241,18 +346,17 @@ def test_main():
idents = []
def callback():
- idents.append(_thread.get_ident())
+ idents.append(threading.get_ident())
_testcapi._test_thread_state(callback)
a = b = callback
time.sleep(1)
# Check our main thread is in the list exactly 3 times.
- if idents.count(_thread.get_ident()) != 3:
+ if idents.count(threading.get_ident()) != 3:
raise support.TestFailed(
"Couldn't find main thread correctly in the list")
if threading:
- import _thread
import time
TestThreadState()
t = threading.Thread(target=TestThreadState)
diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
index 3031fb3..5510a0d 100644
--- a/Lib/test/test_cgi.py
+++ b/Lib/test/test_cgi.py
@@ -4,6 +4,7 @@ import os
import sys
import tempfile
import unittest
+import warnings
from io import StringIO, BytesIO
class HackedSysModule:
@@ -119,9 +120,13 @@ def gen_result(data, environ):
class CgiTests(unittest.TestCase):
def test_escape(self):
- self.assertEqual("test &amp; string", cgi.escape("test & string"))
- self.assertEqual("&lt;test string&gt;", cgi.escape("<test string>"))
- self.assertEqual("&quot;test string&quot;", cgi.escape('"test string"', True))
+ # cgi.escape() is deprecated.
+ with warnings.catch_warnings():
+ warnings.filterwarnings('ignore', 'cgi\.escape',
+ DeprecationWarning)
+ self.assertEqual("test &amp; string", cgi.escape("test & string"))
+ self.assertEqual("&lt;test string&gt;", cgi.escape("<test string>"))
+ self.assertEqual("&quot;test string&quot;", cgi.escape('"test string"', True))
def test_strict(self):
for orig, expect in parse_strict_test_cases:
@@ -160,13 +165,7 @@ class CgiTests(unittest.TestCase):
cgi.logfp = None
cgi.logfile = "/dev/null"
cgi.initlog("%s", "Testing log 3")
- def log_cleanup():
- """Restore the global state of the log vars."""
- cgi.logfile = ''
- cgi.logfp.close()
- cgi.logfp = None
- cgi.log = cgi.initlog
- self.addCleanup(log_cleanup)
+ self.addCleanup(cgi.closelog)
cgi.log("Testing log 4")
def test_fieldstorage_readline(self):
diff --git a/Lib/test/test_cgitb.py b/Lib/test/test_cgitb.py
new file mode 100644
index 0000000..16a4b1a
--- /dev/null
+++ b/Lib/test/test_cgitb.py
@@ -0,0 +1,55 @@
+from test.support import run_unittest
+import unittest
+import sys
+import subprocess
+import cgitb
+
+class TestCgitb(unittest.TestCase):
+
+ def test_fonts(self):
+ text = "Hello Robbie!"
+ self.assertEqual(cgitb.small(text), "<small>{}</small>".format(text))
+ self.assertEqual(cgitb.strong(text), "<strong>{}</strong>".format(text))
+ self.assertEqual(cgitb.grey(text),
+ '<font color="#909090">{}</font>'.format(text))
+
+ def test_blanks(self):
+ self.assertEqual(cgitb.small(""), "")
+ self.assertEqual(cgitb.strong(""), "")
+ self.assertEqual(cgitb.grey(""), "")
+
+ def test_html(self):
+ try:
+ raise ValueError("Hello World")
+ except ValueError as err:
+ # If the html was templated we could do a bit more here.
+ # At least check that we get details on what we just raised.
+ html = cgitb.html(sys.exc_info())
+ self.assertIn("ValueError", html)
+ self.assertIn(str(err), html)
+
+ def test_text(self):
+ try:
+ raise ValueError("Hello World")
+ except ValueError as err:
+ text = cgitb.text(sys.exc_info())
+ self.assertIn("ValueError", text)
+ self.assertIn("Hello World", text)
+
+ def test_hook(self):
+ proc = subprocess.Popen([sys.executable, '-c',
+ ('import cgitb;'
+ 'cgitb.enable();'
+ 'raise ValueError("Hello World")')],
+ stdout=subprocess.PIPE)
+ out = proc.stdout.read().decode(sys.getfilesystemencoding())
+ self.addCleanup(proc.stdout.close)
+ self.assertIn("ValueError", out)
+ self.assertIn("Hello World", out)
+
+
+def test_main():
+ run_unittest(TestCgitb)
+
+if __name__ == "__main__":
+ test_main()
diff --git a/Lib/test/test_cmd.py b/Lib/test/test_cmd.py
index 3a46355..6618535 100644
--- a/Lib/test/test_cmd.py
+++ b/Lib/test/test_cmd.py
@@ -228,7 +228,7 @@ def test_main(verbose=None):
def test_coverage(coverdir):
trace = support.import_module('trace')
- tracer=trace.Trace(ignoredirs=[sys.prefix, sys.exec_prefix,],
+ tracer=trace.Trace(ignoredirs=[sys.base_prefix, sys.base_exec_prefix,],
trace=0, count=1)
tracer.run('reload(cmd);test_main()')
r=tracer.results()
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index f463af4..7644db2 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -31,12 +31,6 @@ class CmdLineTest(unittest.TestCase):
self.verify_valid_flag('-O')
self.verify_valid_flag('-OO')
- def test_q(self):
- self.verify_valid_flag('-Qold')
- self.verify_valid_flag('-Qnew')
- self.verify_valid_flag('-Qwarn')
- self.verify_valid_flag('-Qwarnall')
-
def test_site_flag(self):
self.verify_valid_flag('-S')
@@ -151,7 +145,7 @@ class CmdLineTest(unittest.TestCase):
@unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X')
def test_osx_utf8(self):
def check_output(text):
- decoded = text.decode('utf8', 'surrogateescape')
+ decoded = text.decode('utf-8', 'surrogateescape')
expected = ascii(decoded).encode('ascii') + b'\n'
env = os.environ.copy()
@@ -223,7 +217,7 @@ class CmdLineTest(unittest.TestCase):
self.assertIn(path2.encode('ascii'), out)
def test_displayhook_unencodable(self):
- for encoding in ('ascii', 'latin1', 'utf8'):
+ for encoding in ('ascii', 'latin-1', 'utf-8'):
env = os.environ.copy()
env['PYTHONIOENCODING'] = encoding
p = subprocess.Popen(
@@ -299,7 +293,7 @@ class CmdLineTest(unittest.TestCase):
rc, out, err = assert_python_ok('-c', code)
self.assertEqual(b'', out)
self.assertRegex(err.decode('ascii', 'ignore'),
- 'Exception IOError: .* ignored')
+ 'Exception OSError: .* ignored')
def test_closed_stdout(self):
# Issue #13444: if stdout has been explicitly closed, we should
@@ -353,14 +347,14 @@ class CmdLineTest(unittest.TestCase):
hashes = []
for i in range(2):
code = 'print(hash("spam"))'
- rc, out, err = assert_python_ok('-R', '-c', code)
+ rc, out, err = assert_python_ok('-c', code)
self.assertEqual(rc, 0)
hashes.append(out)
self.assertNotEqual(hashes[0], hashes[1])
# Verify that sys.flags contains hash_randomization
code = 'import sys; print("random is", sys.flags.hash_randomization)'
- rc, out, err = assert_python_ok('-R', '-c', code)
+ rc, out, err = assert_python_ok('-c', code)
self.assertEqual(rc, 0)
self.assertIn(b'random is 1', out)
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index 6b59d96..17dfbc2 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -1,15 +1,20 @@
# tests command line execution of scripts
+import importlib
+import importlib.machinery
+import zipimport
import unittest
import sys
import os
import os.path
import py_compile
+import textwrap
from test import support
from test.script_helper import (
make_pkg, make_script, make_zip_pkg, make_zip_script,
- assert_python_ok, assert_python_failure, temp_dir)
+ assert_python_ok, assert_python_failure, temp_dir,
+ spawn_python, kill_python)
verbose = support.verbose
@@ -32,6 +37,9 @@ f()
assertEqual(result, ['Top level assignment', 'Lower level reference'])
# Check population of magic variables
assertEqual(__name__, '__main__')
+from importlib.machinery import BuiltinImporter
+_loader = __loader__ if __loader__ is BuiltinImporter else type(__loader__)
+print('__loader__==%a' % _loader)
print('__file__==%a' % __file__)
assertEqual(__cached__, None)
print('__package__==%r' % __package__)
@@ -49,12 +57,16 @@ print('cwd==%a' % os.getcwd())
"""
def _make_test_script(script_dir, script_basename, source=test_source):
- return make_script(script_dir, script_basename, source)
+ to_return = make_script(script_dir, script_basename, source)
+ importlib.invalidate_caches()
+ return to_return
def _make_test_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
source=test_source, depth=1):
- return make_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
- source, depth)
+ to_return = make_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
+ source, depth)
+ importlib.invalidate_caches()
+ return to_return
# There's no easy way to pass the script directory in to get
# -m to work (avoiding that is the whole point of making
@@ -72,16 +84,20 @@ def _make_launch_script(script_dir, script_basename, module_name, path=None):
else:
path = repr(path)
source = launch_source % (path, module_name)
- return make_script(script_dir, script_basename, source)
+ to_return = make_script(script_dir, script_basename, source)
+ importlib.invalidate_caches()
+ return to_return
class CmdLineTest(unittest.TestCase):
def _check_output(self, script_name, exit_code, data,
expected_file, expected_argv0,
- expected_path0, expected_package):
+ expected_path0, expected_package,
+ expected_loader):
if verbose > 1:
print("Output from test script %r:" % script_name)
print(data)
self.assertEqual(exit_code, 0)
+ printed_loader = '__loader__==%a' % expected_loader
printed_file = '__file__==%a' % expected_file
printed_package = '__package__==%r' % expected_package
printed_argv0 = 'sys.argv[0]==%a' % expected_argv0
@@ -93,6 +109,7 @@ class CmdLineTest(unittest.TestCase):
print(printed_package)
print(printed_argv0)
print(printed_cwd)
+ self.assertIn(printed_loader.encode('utf-8'), data)
self.assertIn(printed_file.encode('utf-8'), data)
self.assertIn(printed_package.encode('utf-8'), data)
self.assertIn(printed_argv0.encode('utf-8'), data)
@@ -101,14 +118,15 @@ class CmdLineTest(unittest.TestCase):
def _check_script(self, script_name, expected_file,
expected_argv0, expected_path0,
- expected_package,
+ expected_package, expected_loader,
*cmd_line_switches):
if not __debug__:
cmd_line_switches += ('-' + 'O' * sys.flags.optimize,)
run_args = cmd_line_switches + (script_name,) + tuple(example_args)
rc, out, err = assert_python_ok(*run_args)
self._check_output(script_name, rc, out + err, expected_file,
- expected_argv0, expected_path0, expected_package)
+ expected_argv0, expected_path0,
+ expected_package, expected_loader)
def _check_import_error(self, script_name, expected_msg,
*cmd_line_switches):
@@ -120,11 +138,30 @@ class CmdLineTest(unittest.TestCase):
print('Expected output: %r' % expected_msg)
self.assertIn(expected_msg.encode('utf-8'), err)
+ def test_dash_c_loader(self):
+ rc, out, err = assert_python_ok("-c", "print(__loader__)")
+ expected = repr(importlib.machinery.BuiltinImporter).encode("utf-8")
+ self.assertIn(expected, out)
+
+ def test_stdin_loader(self):
+ # Unfortunately, there's no way to automatically test the fully
+ # interactive REPL, since that code path only gets executed when
+ # stdin is an interactive tty.
+ p = spawn_python()
+ try:
+ p.stdin.write(b"print(__loader__)\n")
+ p.stdin.flush()
+ finally:
+ out = kill_python(p)
+ expected = repr(importlib.machinery.BuiltinImporter).encode("utf-8")
+ self.assertIn(expected, out)
+
def test_basic_script(self):
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, 'script')
self._check_script(script_name, script_name, script_name,
- script_dir, None)
+ script_dir, None,
+ importlib.machinery.SourceFileLoader)
def test_script_compiled(self):
with temp_dir() as script_dir:
@@ -133,13 +170,15 @@ class CmdLineTest(unittest.TestCase):
os.remove(script_name)
pyc_file = support.make_legacy_pyc(script_name)
self._check_script(pyc_file, pyc_file,
- pyc_file, script_dir, None)
+ pyc_file, script_dir, None,
+ importlib.machinery.SourcelessFileLoader)
def test_directory(self):
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, '__main__')
self._check_script(script_dir, script_name, script_dir,
- script_dir, '')
+ script_dir, '',
+ importlib.machinery.SourceFileLoader)
def test_directory_compiled(self):
with temp_dir() as script_dir:
@@ -148,7 +187,8 @@ class CmdLineTest(unittest.TestCase):
os.remove(script_name)
pyc_file = support.make_legacy_pyc(script_name)
self._check_script(script_dir, pyc_file, script_dir,
- script_dir, '')
+ script_dir, '',
+ importlib.machinery.SourcelessFileLoader)
def test_directory_error(self):
with temp_dir() as script_dir:
@@ -159,14 +199,16 @@ class CmdLineTest(unittest.TestCase):
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, '__main__')
zip_name, run_name = make_zip_script(script_dir, 'test_zip', script_name)
- self._check_script(zip_name, run_name, zip_name, zip_name, '')
+ self._check_script(zip_name, run_name, zip_name, zip_name, '',
+ zipimport.zipimporter)
def test_zipfile_compiled(self):
with temp_dir() as script_dir:
script_name = _make_test_script(script_dir, '__main__')
compiled_name = py_compile.compile(script_name, doraise=True)
zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name)
- self._check_script(zip_name, run_name, zip_name, zip_name, '')
+ self._check_script(zip_name, run_name, zip_name, zip_name, '',
+ zipimport.zipimporter)
def test_zipfile_error(self):
with temp_dir() as script_dir:
@@ -181,19 +223,24 @@ class CmdLineTest(unittest.TestCase):
make_pkg(pkg_dir)
script_name = _make_test_script(pkg_dir, 'script')
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.script')
- self._check_script(launch_name, script_name, script_name, script_dir, 'test_pkg')
+ self._check_script(launch_name, script_name, script_name,
+ script_dir, 'test_pkg',
+ importlib.machinery.SourceFileLoader)
def test_module_in_package_in_zipfile(self):
with temp_dir() as script_dir:
zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script')
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.script', zip_name)
- self._check_script(launch_name, run_name, run_name, zip_name, 'test_pkg')
+ self._check_script(launch_name, run_name, run_name,
+ zip_name, 'test_pkg', zipimport.zipimporter)
def test_module_in_subpackage_in_zipfile(self):
with temp_dir() as script_dir:
zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script', depth=2)
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.test_pkg.script', zip_name)
- self._check_script(launch_name, run_name, run_name, zip_name, 'test_pkg.test_pkg')
+ self._check_script(launch_name, run_name, run_name,
+ zip_name, 'test_pkg.test_pkg',
+ zipimport.zipimporter)
def test_package(self):
with temp_dir() as script_dir:
@@ -202,7 +249,8 @@ class CmdLineTest(unittest.TestCase):
script_name = _make_test_script(pkg_dir, '__main__')
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
self._check_script(launch_name, script_name,
- script_name, script_dir, 'test_pkg')
+ script_name, script_dir, 'test_pkg',
+ importlib.machinery.SourceFileLoader)
def test_package_compiled(self):
with temp_dir() as script_dir:
@@ -214,7 +262,8 @@ class CmdLineTest(unittest.TestCase):
pyc_file = support.make_legacy_pyc(script_name)
launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
self._check_script(launch_name, pyc_file,
- pyc_file, script_dir, 'test_pkg')
+ pyc_file, script_dir, 'test_pkg',
+ importlib.machinery.SourcelessFileLoader)
def test_package_error(self):
with temp_dir() as script_dir:
@@ -251,7 +300,8 @@ class CmdLineTest(unittest.TestCase):
expected = "init_argv0==%r" % '-m'
self.assertIn(expected.encode('utf-8'), out)
self._check_output(script_name, rc, out,
- script_name, script_name, '', 'test_pkg')
+ script_name, script_name, '', 'test_pkg',
+ importlib.machinery.SourceFileLoader)
def test_issue8202_dash_c_file_ignored(self):
# Make sure a "-c" file in the current directory
@@ -277,7 +327,8 @@ class CmdLineTest(unittest.TestCase):
f.write("data")
rc, out, err = assert_python_ok('-m', 'other', *example_args)
self._check_output(script_name, rc, out,
- script_name, script_name, '', '')
+ script_name, script_name, '', '',
+ importlib.machinery.SourceFileLoader)
def test_dash_m_error_code_is_one(self):
# If a module is invoked with the -m command line flag
@@ -294,6 +345,24 @@ class CmdLineTest(unittest.TestCase):
print(out)
self.assertEqual(rc, 1)
+ def test_pep_409_verbiage(self):
+ # Make sure PEP 409 syntax properly suppresses
+ # the context of an exception
+ script = textwrap.dedent("""\
+ try:
+ raise ValueError
+ except:
+ raise NameError from None
+ """)
+ with temp_dir() as script_dir:
+ script_name = _make_test_script(script_dir, 'script', script)
+ exitcode, stdout, stderr = assert_python_failure(script_name)
+ text = stderr.decode('ascii').split('\n')
+ self.assertEqual(len(text), 4)
+ self.assertTrue(text[0].startswith('Traceback'))
+ self.assertTrue(text[1].startswith(' File '))
+ self.assertTrue(text[3].startswith('NameError'))
+
def test_main():
support.run_unittest(CmdLineTest)
support.reap_children()
diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py
index e1c7a78..3377a7b 100644
--- a/Lib/test/test_code.py
+++ b/Lib/test/test_code.py
@@ -16,7 +16,7 @@ cellvars: ('x',)
freevars: ()
nlocals: 2
flags: 3
-consts: ('None', '<code object g>')
+consts: ('None', '<code object g>', "'f.<locals>.g'")
>>> dump(f(4).__code__)
name: g
diff --git a/Lib/test/test_code_module.py b/Lib/test/test_code_module.py
new file mode 100644
index 0000000..adef170
--- /dev/null
+++ b/Lib/test/test_code_module.py
@@ -0,0 +1,72 @@
+"Test InteractiveConsole and InteractiveInterpreter from code module"
+import sys
+import unittest
+from contextlib import ExitStack
+from unittest import mock
+from test import support
+
+code = support.import_module('code')
+
+
+class TestInteractiveConsole(unittest.TestCase):
+
+ def setUp(self):
+ self.console = code.InteractiveConsole()
+ self.mock_sys()
+
+ def mock_sys(self):
+ "Mock system environment for InteractiveConsole"
+ # use exit stack to match patch context managers to addCleanup
+ stack = ExitStack()
+ self.addCleanup(stack.close)
+ self.infunc = stack.enter_context(mock.patch('code.input',
+ create=True))
+ self.stdout = stack.enter_context(mock.patch('code.sys.stdout'))
+ self.stderr = stack.enter_context(mock.patch('code.sys.stderr'))
+ prepatch = mock.patch('code.sys', wraps=code.sys, spec=code.sys)
+ self.sysmod = stack.enter_context(prepatch)
+ if sys.excepthook is sys.__excepthook__:
+ self.sysmod.excepthook = self.sysmod.__excepthook__
+
+ def test_ps1(self):
+ self.infunc.side_effect = EOFError('Finished')
+ self.console.interact()
+ self.assertEqual(self.sysmod.ps1, '>>> ')
+
+ def test_ps2(self):
+ self.infunc.side_effect = EOFError('Finished')
+ self.console.interact()
+ self.assertEqual(self.sysmod.ps2, '... ')
+
+ def test_console_stderr(self):
+ self.infunc.side_effect = ["'antioch'", "", EOFError('Finished')]
+ self.console.interact()
+ for call in list(self.stdout.method_calls):
+ if 'antioch' in ''.join(call[1]):
+ break
+ else:
+ raise AssertionError("no console stdout")
+
+ def test_syntax_error(self):
+ self.infunc.side_effect = ["undefined", EOFError('Finished')]
+ self.console.interact()
+ for call in self.stderr.method_calls:
+ if 'NameError:' in ''.join(call[1]):
+ break
+ else:
+ raise AssertionError("No syntax error from console")
+
+ def test_sysexcepthook(self):
+ self.infunc.side_effect = ["raise ValueError('')",
+ EOFError('Finished')]
+ hook = mock.Mock()
+ self.sysmod.excepthook = hook
+ self.console.interact()
+ self.assertTrue(hook.called)
+
+
+def test_main():
+ support.run_unittest(TestInteractiveConsole)
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index e656d2f..81bf80d 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -1,5 +1,18 @@
-import test.support, unittest
-import sys, codecs, html.entities, unicodedata
+import codecs
+import html.entities
+import sys
+import test.support
+import unicodedata
+import unittest
+import warnings
+
+try:
+ import ctypes
+except ImportError:
+ ctypes = None
+ SIZEOF_WCHAR_T = -1
+else:
+ SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar)
class PosReturn:
# this can be used for configurable callbacks
@@ -135,22 +148,14 @@ class CodecCallbackTest(unittest.TestCase):
def test_backslashescape(self):
# Does the same as the "unicode-escape" encoding, but with different
# base encodings.
- sin = "a\xac\u1234\u20ac\u8000"
- if sys.maxunicode > 0xffff:
- sin += chr(sys.maxunicode)
- sout = b"a\\xac\\u1234\\u20ac\\u8000"
- if sys.maxunicode > 0xffff:
- sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
+ sin = "a\xac\u1234\u20ac\u8000\U0010ffff"
+ sout = b"a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
- sout = b"a\xac\\u1234\\u20ac\\u8000"
- if sys.maxunicode > 0xffff:
- sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
+ sout = b"a\xac\\u1234\\u20ac\\u8000\\U0010ffff"
self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
- sout = b"a\xac\\u1234\xa4\\u8000"
- if sys.maxunicode > 0xffff:
- sout += bytes("\\U%08x" % sys.maxunicode, "ascii")
+ sout = b"a\xac\\u1234\xa4\\u8000\\U0010ffff"
self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
def test_decoding_callbacks(self):
@@ -200,33 +205,37 @@ class CodecCallbackTest(unittest.TestCase):
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
def test_decodeunicodeinternal(self):
- self.assertRaises(
- UnicodeDecodeError,
- b"\x00\x00\x00\x00\x00".decode,
- "unicode-internal",
- )
- if sys.maxunicode > 0xffff:
+ with test.support.check_warnings(('unicode_internal codec has been '
+ 'deprecated', DeprecationWarning)):
+ self.assertRaises(
+ UnicodeDecodeError,
+ b"\x00\x00\x00\x00\x00".decode,
+ "unicode-internal",
+ )
+ if SIZEOF_WCHAR_T == 4:
def handler_unicodeinternal(exc):
if not isinstance(exc, UnicodeDecodeError):
raise TypeError("don't know how to handle %r" % exc)
return ("\x01", 1)
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
- "\u0000"
- )
+ with test.support.check_warnings(('unicode_internal codec has been '
+ 'deprecated', DeprecationWarning)):
+ self.assertEqual(
+ b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
+ "\u0000"
+ )
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
- "\u0000\ufffd"
- )
+ self.assertEqual(
+ b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
+ "\u0000\ufffd"
+ )
- codecs.register_error("test.hui", handler_unicodeinternal)
+ codecs.register_error("test.hui", handler_unicodeinternal)
- self.assertEqual(
- b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
- "\u0000\u0001\u0000"
- )
+ self.assertEqual(
+ b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
+ "\u0000\u0001\u0000"
+ )
def test_callbacks(self):
def handler1(exc):
@@ -355,7 +364,7 @@ class CodecCallbackTest(unittest.TestCase):
["ascii", "\uffffx", 0, 1, "ouch"],
"'ascii' codec can't encode character '\\uffff' in position 0: ouch"
)
- if sys.maxunicode > 0xffff:
+ if SIZEOF_WCHAR_T == 4:
self.check_exceptionobjectargs(
UnicodeEncodeError,
["ascii", "\U00010000x", 0, 1, "ouch"],
@@ -390,7 +399,7 @@ class CodecCallbackTest(unittest.TestCase):
["g\uffffrk", 1, 2, "ouch"],
"can't translate character '\\uffff' in position 1: ouch"
)
- if sys.maxunicode > 0xffff:
+ if SIZEOF_WCHAR_T == 4:
self.check_exceptionobjectargs(
UnicodeTranslateError,
["g\U00010000rk", 1, 2, "ouch"],
@@ -577,31 +586,30 @@ class CodecCallbackTest(unittest.TestCase):
UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
("\\uffff", 1)
)
- # 1 on UCS-4 builds, 2 on UCS-2
- len_wide = len("\U00010000")
- self.assertEqual(
- codecs.backslashreplace_errors(
- UnicodeEncodeError("ascii", "\U00010000",
- 0, len_wide, "ouch")),
- ("\\U00010000", len_wide)
- )
- self.assertEqual(
- codecs.backslashreplace_errors(
- UnicodeEncodeError("ascii", "\U0010ffff",
- 0, len_wide, "ouch")),
- ("\\U0010ffff", len_wide)
- )
- # Lone surrogates (regardless of unicode width)
- self.assertEqual(
- codecs.backslashreplace_errors(
- UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
- ("\\ud800", 1)
- )
- self.assertEqual(
- codecs.backslashreplace_errors(
- UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
- ("\\udfff", 1)
- )
+ if SIZEOF_WCHAR_T > 0:
+ self.assertEqual(
+ codecs.backslashreplace_errors(
+ UnicodeEncodeError("ascii", "\U00010000",
+ 0, 1, "ouch")),
+ ("\\U00010000", 1)
+ )
+ self.assertEqual(
+ codecs.backslashreplace_errors(
+ UnicodeEncodeError("ascii", "\U0010ffff",
+ 0, 1, "ouch")),
+ ("\\U0010ffff", 1)
+ )
+ # Lone surrogates (regardless of unicode width)
+ self.assertEqual(
+ codecs.backslashreplace_errors(
+ UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")),
+ ("\\ud800", 1)
+ )
+ self.assertEqual(
+ codecs.backslashreplace_errors(
+ UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")),
+ ("\\udfff", 1)
+ )
def test_badhandlerresults(self):
results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
@@ -622,12 +630,14 @@ class CodecCallbackTest(unittest.TestCase):
("utf-7", b"+x-"),
("unicode-internal", b"\x00"),
):
- self.assertRaises(
- TypeError,
- bytes.decode,
- enc,
- "test.badhandler"
- )
+ with test.support.check_warnings():
+ # unicode-internal has been deprecated
+ self.assertRaises(
+ TypeError,
+ bytes.decode,
+ enc,
+ "test.badhandler"
+ )
def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
@@ -679,7 +689,7 @@ class CodecCallbackTest(unittest.TestCase):
# Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
# and inline implementations
v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
- if sys.maxunicode>=100000:
+ if SIZEOF_WCHAR_T == 4:
v += (100000, 500000, 1000000)
s = "".join([chr(x) for x in v])
codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
@@ -744,7 +754,7 @@ class CodecCallbackTest(unittest.TestCase):
raise ValueError
self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None})
self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D())
- self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: 0x110000})
+ self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1})
def test_encodehelper(self):
# enhance coverage of:
@@ -843,8 +853,12 @@ class CodecCallbackTest(unittest.TestCase):
else:
raise TypeError("don't know how to handle %r" % exc)
codecs.register_error("test.replacing", replacing)
- for (encoding, data) in baddata:
- self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
+
+ with test.support.check_warnings():
+ # unicode-internal has been deprecated
+ for (encoding, data) in baddata:
+ with self.assertRaises(TypeError):
+ data.decode(encoding, "test.replacing")
def mutating(exc):
if isinstance(exc, UnicodeDecodeError):
@@ -855,8 +869,11 @@ class CodecCallbackTest(unittest.TestCase):
codecs.register_error("test.mutating", mutating)
# If the decoder doesn't pick up the modified input the following
# will lead to an endless loop
- for (encoding, data) in baddata:
- self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
+ with test.support.check_warnings():
+ # unicode-internal has been deprecated
+ for (encoding, data) in baddata:
+ with self.assertRaises(TypeError):
+ data.decode(encoding, "test.replacing")
def test_main():
test.support.run_unittest(CodecCallbackTest)
diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py
index dca9f10..b08c5fc 100644
--- a/Lib/test/test_codecencodings_cn.py
+++ b/Lib/test/test_codecencodings_cn.py
@@ -5,54 +5,57 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'gb2312'
- tstring = test_multibytecodec_support.load_teststring('gb2312')
+ tstring = multibytecodec_support.load_teststring('gb2312')
codectests = (
# invalid bytes
(b"abc\x81\x81\xc1\xc4", "strict", None),
(b"abc\xc8", "strict", None),
- (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
- (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+ (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
+ (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
(b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
(b"\xc1\x64", "strict", None),
)
-class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'gbk'
- tstring = test_multibytecodec_support.load_teststring('gbk')
+ tstring = multibytecodec_support.load_teststring('gbk')
codectests = (
# invalid bytes
(b"abc\x80\x80\xc1\xc4", "strict", None),
(b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+ (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
+ (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
(b"\x83\x34\x83\x31", "strict", None),
("\u30fb", "strict", None),
)
-class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'gb18030'
- tstring = test_multibytecodec_support.load_teststring('gb18030')
+ tstring = multibytecodec_support.load_teststring('gb18030')
codectests = (
# invalid bytes
(b"abc\x80\x80\xc1\xc4", "strict", None),
(b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+ (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
+ (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
- (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
+ (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
("\u30fb", "strict", b"\x819\xa79"),
+ (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
+ (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
+ (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
)
has_iso10646 = True
-class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'hz'
- tstring = test_multibytecodec_support.load_teststring('hz')
+ tstring = multibytecodec_support.load_teststring('hz')
codectests = (
# test '~\n' (3 lines)
(b'This sentence is in ASCII.\n'
@@ -74,9 +77,11 @@ class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
'Bye.\n'),
# invalid bytes
- (b'ab~cd', 'replace', 'ab\uFFFDd'),
+ (b'ab~cd', 'replace', 'ab\uFFFDcd'),
(b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
+ (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
+ (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
)
def test_main():
diff --git a/Lib/test/test_codecencodings_hk.py b/Lib/test/test_codecencodings_hk.py
index ccdc0b4..31363f4 100644
--- a/Lib/test/test_codecencodings_hk.py
+++ b/Lib/test/test_codecencodings_hk.py
@@ -5,18 +5,18 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_Big5HKSCS(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'big5hkscs'
- tstring = test_multibytecodec_support.load_teststring('big5hkscs')
+ tstring = multibytecodec_support.load_teststring('big5hkscs')
codectests = (
# invalid bytes
(b"abc\x80\x80\xc1\xc4", "strict", None),
(b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
+ (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"),
+ (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"),
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
)
diff --git a/Lib/test/test_codecencodings_iso2022.py b/Lib/test/test_codecencodings_iso2022.py
index 8c6e8a5..e4c1839 100644
--- a/Lib/test/test_codecencodings_iso2022.py
+++ b/Lib/test/test_codecencodings_iso2022.py
@@ -3,7 +3,7 @@
# Codec encoding tests for ISO 2022 encodings.
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
COMMON_CODEC_TESTS = (
@@ -13,23 +13,23 @@ COMMON_CODEC_TESTS = (
(b'ab\x1B$def', 'replace', 'ab\uFFFD'),
)
-class Test_ISO2022_JP(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_ISO2022_JP(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'iso2022_jp'
- tstring = test_multibytecodec_support.load_teststring('iso2022_jp')
+ tstring = multibytecodec_support.load_teststring('iso2022_jp')
codectests = COMMON_CODEC_TESTS + (
(b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
)
-class Test_ISO2022_JP2(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'iso2022_jp_2'
- tstring = test_multibytecodec_support.load_teststring('iso2022_jp')
+ tstring = multibytecodec_support.load_teststring('iso2022_jp')
codectests = COMMON_CODEC_TESTS + (
(b'ab\x1BNdef', 'replace', 'abdef'),
)
-class Test_ISO2022_KR(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'iso2022_kr'
- tstring = test_multibytecodec_support.load_teststring('iso2022_kr')
+ tstring = multibytecodec_support.load_teststring('iso2022_kr')
codectests = COMMON_CODEC_TESTS + (
(b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
)
diff --git a/Lib/test/test_codecencodings_jp.py b/Lib/test/test_codecencodings_jp.py
index f56a373..30c9e19 100644
--- a/Lib/test/test_codecencodings_jp.py
+++ b/Lib/test/test_codecencodings_jp.py
@@ -5,60 +5,67 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'cp932'
- tstring = test_multibytecodec_support.load_teststring('shift_jis')
+ tstring = multibytecodec_support.load_teststring('shift_jis')
codectests = (
# invalid bytes
(b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
(b"abc\xf8", "strict", None),
- (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
- (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
- (b"abc\x81\x00\x82\x84", "ignore", "abc\uff44"),
+ (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
+ (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
+ (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"),
+ (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
+ (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
+ (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
# sjis vs cp932
(b"\\\x7e", "replace", "\\\x7e"),
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
)
-class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
+euc_commontests = (
+ # invalid bytes
+ (b"abc\x80\x80\xc1\xc4", "strict", None),
+ (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
+ (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
+ (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
+ (b"abc\xc8", "strict", None),
+ (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
+ (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
+ (b"\xc1\x64", "strict", None),
+ (b"\xa1\xc0", "strict", "\uff3c"),
+ (b"\xa1\xc0\\", "strict", "\uff3c\\"),
+ (b"\x8eXY", "replace", "\ufffdXY"),
+)
+
+class Test_EUC_JIS_2004(multibytecodec_support.TestBase,
unittest.TestCase):
- encoding = 'euc_jisx0213'
- tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
- codectests = (
- # invalid bytes
- (b"abc\x80\x80\xc1\xc4", "strict", None),
- (b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
- (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
- (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
- (b"\xc1\x64", "strict", None),
- (b"\xa1\xc0", "strict", "\uff3c"),
- )
+ encoding = 'euc_jis_2004'
+ tstring = multibytecodec_support.load_teststring('euc_jisx0213')
+ codectests = euc_commontests
xmlcharnametest = (
"\xab\u211c\xbb = \u2329\u1234\u232a",
b"\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;"
)
-eucjp_commontests = (
- (b"abc\x80\x80\xc1\xc4", "strict", None),
- (b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
- (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
- (b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
- (b"\xc1\x64", "strict", None),
-)
+class Test_EUC_JISX0213(multibytecodec_support.TestBase,
+ unittest.TestCase):
+ encoding = 'euc_jisx0213'
+ tstring = multibytecodec_support.load_teststring('euc_jisx0213')
+ codectests = euc_commontests
+ xmlcharnametest = (
+ "\xab\u211c\xbb = \u2329\u1234\u232a",
+ b"\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;"
+ )
-class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
+class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase,
unittest.TestCase):
encoding = 'euc_jp'
- tstring = test_multibytecodec_support.load_teststring('euc_jp')
- codectests = eucjp_commontests + (
- (b"\xa1\xc0\\", "strict", "\uff3c\\"),
+ tstring = multibytecodec_support.load_teststring('euc_jp')
+ codectests = euc_commontests + (
("\xa5", "strict", b"\x5c"),
("\u203e", "strict", b"\x7e"),
)
@@ -66,29 +73,48 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
shiftjis_commonenctests = (
(b"abc\x80\x80\x82\x84", "strict", None),
(b"abc\xf8", "strict", None),
- (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
- (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
)
-class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jis'
- tstring = test_multibytecodec_support.load_teststring('shift_jis')
+ tstring = multibytecodec_support.load_teststring('shift_jis')
codectests = shiftjis_commonenctests + (
+ (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
+ (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
+
(b"\\\x7e", "strict", "\\\x7e"),
(b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
+ (b"abc\x81\x39", "replace", "abc\ufffd9"),
+ (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"),
+ (b"abc\xFF\x58", "replace", "abc\ufffdX"),
)
-class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
+ encoding = 'shift_jis_2004'
+ tstring = multibytecodec_support.load_teststring('shift_jis')
+ codectests = shiftjis_commonenctests + (
+ (b"\\\x7e", "strict", "\xa5\u203e"),
+ (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
+ (b"abc\xEA\xFC", "strict", "abc\u64bf"),
+ (b"\x81\x39xy", "replace", "\ufffd9xy"),
+ (b"\xFF\x58xy", "replace", "\ufffdXxy"),
+ (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
+ (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
+ (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
+ )
+ xmlcharnametest = (
+ "\xab\u211c\xbb = \u2329\u1234\u232a",
+ b"\x85G&real;\x85Q = &lang;&#4660;&rang;"
+ )
+
+class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'shift_jisx0213'
- tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
- codectests = (
- # invalid bytes
- (b"abc\x80\x80\x82\x84", "strict", None),
- (b"abc\xf8", "strict", None),
- (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
- (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
- (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
+ tstring = multibytecodec_support.load_teststring('shift_jisx0213')
+ codectests = shiftjis_commonenctests + (
+ (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
+ (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
+
# sjis vs cp932
(b"\\\x7e", "replace", "\xa5\u203e"),
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
diff --git a/Lib/test/test_codecencodings_kr.py b/Lib/test/test_codecencodings_kr.py
index de4da7f..4dd6049 100644
--- a/Lib/test/test_codecencodings_kr.py
+++ b/Lib/test/test_codecencodings_kr.py
@@ -5,30 +5,30 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_CP949(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'cp949'
- tstring = test_multibytecodec_support.load_teststring('cp949')
+ tstring = multibytecodec_support.load_teststring('cp949')
codectests = (
# invalid bytes
(b"abc\x80\x80\xc1\xc4", "strict", None),
(b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
+ (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"),
+ (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
)
-class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_EUCKR(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'euc_kr'
- tstring = test_multibytecodec_support.load_teststring('euc_kr')
+ tstring = multibytecodec_support.load_teststring('euc_kr')
codectests = (
# invalid bytes
(b"abc\x80\x80\xc1\xc4", "strict", None),
(b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
+ (b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'),
+ (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
# composed make-up sequence errors
@@ -40,26 +40,31 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"),
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"),
- (b"a\xa4\xd4\xa4\xb6\xa4", "replace", "a\ufffd"),
+ (b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'),
(b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
(b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
- (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", "\ufffd"),
- (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", "\ufffd"),
- (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", "\ufffd"),
+ (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", '\ufffd\u6e21\ufffd\u3160\ufffd'),
+ (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", '\ufffd\u6e21\ub544\ufffd\ufffd'),
+ (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", '\ufffd\u6e21\ub544\u572d\ufffd'),
+ (b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace", '\ufffd\ufffd\ufffd\uc4d4'),
(b"\xc1\xc4", "strict", "\uc894"),
)
-class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_JOHAB(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'johab'
- tstring = test_multibytecodec_support.load_teststring('johab')
+ tstring = multibytecodec_support.load_teststring('johab')
codectests = (
# invalid bytes
(b"abc\x80\x80\xc1\xc4", "strict", None),
(b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
+ (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"),
+ (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"),
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
+ (b"\xD8abc", "replace", "\uFFFDabc"),
+ (b"\xD8\xFFabc", "replace", "\uFFFD\uFFFDabc"),
+ (b"\x84bxy", "replace", "\uFFFDbxy"),
+ (b"\x8CBxy", "replace", "\uFFFDBxy"),
)
def test_main():
diff --git a/Lib/test/test_codecencodings_tw.py b/Lib/test/test_codecencodings_tw.py
index 12d3c9f..96245b7 100644
--- a/Lib/test/test_codecencodings_tw.py
+++ b/Lib/test/test_codecencodings_tw.py
@@ -5,18 +5,18 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
+class Test_Big5(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'big5'
- tstring = test_multibytecodec_support.load_teststring('big5')
+ tstring = multibytecodec_support.load_teststring('big5')
codectests = (
# invalid bytes
(b"abc\x80\x80\xc1\xc4", "strict", None),
(b"abc\xc8", "strict", None),
- (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
- (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
+ (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"),
+ (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"),
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
)
diff --git a/Lib/test/test_codecmaps_cn.py b/Lib/test/test_codecmaps_cn.py
index 063919d..1a761cf 100644
--- a/Lib/test/test_codecmaps_cn.py
+++ b/Lib/test/test_codecmaps_cn.py
@@ -5,21 +5,21 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class TestGB2312Map(test_multibytecodec_support.TestBase_Mapping,
+class TestGB2312Map(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'gb2312'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-CN.TXT'
-class TestGBKMap(test_multibytecodec_support.TestBase_Mapping,
+class TestGBKMap(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'gbk'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/' \
'MICSFT/WINDOWS/CP936.TXT'
-class TestGB18030Map(test_multibytecodec_support.TestBase_Mapping,
+class TestGB18030Map(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'gb18030'
mapfileurl = 'http://source.icu-project.org/repos/icu/data/' \
diff --git a/Lib/test/test_codecmaps_hk.py b/Lib/test/test_codecmaps_hk.py
index bbe1f2f..5f4e7c7 100644
--- a/Lib/test/test_codecmaps_hk.py
+++ b/Lib/test/test_codecmaps_hk.py
@@ -5,10 +5,10 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class TestBig5HKSCSMap(test_multibytecodec_support.TestBase_Mapping,
+class TestBig5HKSCSMap(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'big5hkscs'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/BIG5HKSCS-2004.TXT'
diff --git a/Lib/test/test_codecmaps_jp.py b/Lib/test/test_codecmaps_jp.py
index 652bd81..1fdbf63 100644
--- a/Lib/test/test_codecmaps_jp.py
+++ b/Lib/test/test_codecmaps_jp.py
@@ -5,10 +5,10 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class TestCP932Map(test_multibytecodec_support.TestBase_Mapping,
+class TestCP932Map(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'cp932'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
@@ -24,14 +24,14 @@ class TestCP932Map(test_multibytecodec_support.TestBase_Mapping,
supmaps.append((bytes([i]), chr(i+0xfec0)))
-class TestEUCJPCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
+class TestEUCJPCOMPATMap(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'euc_jp'
mapfilename = 'EUC-JP.TXT'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JP.TXT'
-class TestSJISCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
+class TestSJISCOMPATMap(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'shift_jis'
mapfilename = 'SHIFTJIS.TXT'
@@ -46,14 +46,14 @@ class TestSJISCOMPATMap(test_multibytecodec_support.TestBase_Mapping,
(b'\x81_', '\\'),
]
-class TestEUCJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
+class TestEUCJISX0213Map(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'euc_jisx0213'
mapfilename = 'EUC-JISX0213.TXT'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-JISX0213.TXT'
-class TestSJISX0213Map(test_multibytecodec_support.TestBase_Mapping,
+class TestSJISX0213Map(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'shift_jisx0213'
mapfilename = 'SHIFT_JISX0213.TXT'
diff --git a/Lib/test/test_codecmaps_kr.py b/Lib/test/test_codecmaps_kr.py
index d909c8b..0356402 100644
--- a/Lib/test/test_codecmaps_kr.py
+++ b/Lib/test/test_codecmaps_kr.py
@@ -5,17 +5,17 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class TestCP949Map(test_multibytecodec_support.TestBase_Mapping,
+class TestCP949Map(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'cp949'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT' \
'/WINDOWS/CP949.TXT'
-class TestEUCKRMap(test_multibytecodec_support.TestBase_Mapping,
+class TestEUCKRMap(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'euc_kr'
mapfileurl = 'http://people.freebsd.org/~perky/i18n/EUC-KR.TXT'
@@ -25,7 +25,7 @@ class TestEUCKRMap(test_multibytecodec_support.TestBase_Mapping,
pass_dectest = [(b'\xa4\xd4', '\u3164')]
-class TestJOHABMap(test_multibytecodec_support.TestBase_Mapping,
+class TestJOHABMap(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'johab'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/' \
diff --git a/Lib/test/test_codecmaps_tw.py b/Lib/test/test_codecmaps_tw.py
index 6db5091..44467e3 100644
--- a/Lib/test/test_codecmaps_tw.py
+++ b/Lib/test/test_codecmaps_tw.py
@@ -5,16 +5,16 @@
#
from test import support
-from test import test_multibytecodec_support
+from test import multibytecodec_support
import unittest
-class TestBIG5Map(test_multibytecodec_support.TestBase_Mapping,
+class TestBIG5Map(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'big5'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/' \
'EASTASIA/OTHER/BIG5.TXT'
-class TestCP950Map(test_multibytecodec_support.TestBase_Mapping,
+class TestCP950Map(multibytecodec_support.TestBase_Mapping,
unittest.TestCase):
encoding = 'cp950'
mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
@@ -23,6 +23,9 @@ class TestCP950Map(test_multibytecodec_support.TestBase_Mapping,
(b'\xa2\xcc', '\u5341'),
(b'\xa2\xce', '\u5345'),
]
+ codectests = (
+ (b"\xFFxy", "replace", "\ufffdxy"),
+ )
def test_main():
support.run_unittest(__name__)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index f342d88..4e808ec 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1,8 +1,25 @@
-from test import support
-import unittest
+import _testcapi
import codecs
+import io
import locale
-import sys, _testcapi, io
+import sys
+import unittest
+import warnings
+
+from test import support
+
+if sys.platform == 'win32':
+ VISTA_OR_LATER = (sys.getwindowsversion().major >= 6)
+else:
+ VISTA_OR_LATER = False
+
+try:
+ import ctypes
+except ImportError:
+ ctypes = None
+ SIZEOF_WCHAR_T = -1
+else:
+ SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar)
class Queue(object):
"""
@@ -644,8 +661,113 @@ class UTF8Test(ReadTest):
b"abc\xed\xa0\x80def")
self.assertEqual(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"),
"abc\ud800def")
+ self.assertEqual("\U00010fff\uD800".encode("utf-8", "surrogatepass"),
+ b"\xf0\x90\xbf\xbf\xed\xa0\x80")
+ self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("utf-8", "surrogatepass"),
+ "\U00010fff\uD800")
self.assertTrue(codecs.lookup_error("surrogatepass"))
+@unittest.skipUnless(sys.platform == 'win32',
+ 'cp65001 is a Windows-only codec')
+class CP65001Test(ReadTest):
+ encoding = "cp65001"
+
+ def test_encode(self):
+ tests = [
+ ('abc', 'strict', b'abc'),
+ ('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'),
+ ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
+ ]
+ if VISTA_OR_LATER:
+ tests.extend((
+ ('\udc80', 'strict', None),
+ ('\udc80', 'ignore', b''),
+ ('\udc80', 'replace', b'?'),
+ ('\udc80', 'backslashreplace', b'\\udc80'),
+ ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
+ ))
+ else:
+ tests.append(('\udc80', 'strict', b'\xed\xb2\x80'))
+ for text, errors, expected in tests:
+ if expected is not None:
+ try:
+ encoded = text.encode('cp65001', errors)
+ except UnicodeEncodeError as err:
+ self.fail('Unable to encode %a to cp65001 with '
+ 'errors=%r: %s' % (text, errors, err))
+ self.assertEqual(encoded, expected,
+ '%a.encode("cp65001", %r)=%a != %a'
+ % (text, errors, encoded, expected))
+ else:
+ self.assertRaises(UnicodeEncodeError,
+ text.encode, "cp65001", errors)
+
+ def test_decode(self):
+ tests = [
+ (b'abc', 'strict', 'abc'),
+ (b'\xc3\xa9\xe2\x82\xac', 'strict', '\xe9\u20ac'),
+ (b'\xf4\x8f\xbf\xbf', 'strict', '\U0010ffff'),
+ (b'\xef\xbf\xbd', 'strict', '\ufffd'),
+ (b'[\xc3\xa9]', 'strict', '[\xe9]'),
+ # invalid bytes
+ (b'[\xff]', 'strict', None),
+ (b'[\xff]', 'ignore', '[]'),
+ (b'[\xff]', 'replace', '[\ufffd]'),
+ (b'[\xff]', 'surrogateescape', '[\udcff]'),
+ ]
+ if VISTA_OR_LATER:
+ tests.extend((
+ (b'[\xed\xb2\x80]', 'strict', None),
+ (b'[\xed\xb2\x80]', 'ignore', '[]'),
+ (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
+ ))
+ else:
+ tests.extend((
+ (b'[\xed\xb2\x80]', 'strict', '[\udc80]'),
+ ))
+ for raw, errors, expected in tests:
+ if expected is not None:
+ try:
+ decoded = raw.decode('cp65001', errors)
+ except UnicodeDecodeError as err:
+ self.fail('Unable to decode %a from cp65001 with '
+ 'errors=%r: %s' % (raw, errors, err))
+ self.assertEqual(decoded, expected,
+ '%a.decode("cp65001", %r)=%a != %a'
+ % (raw, errors, decoded, expected))
+ else:
+ self.assertRaises(UnicodeDecodeError,
+ raw.decode, 'cp65001', errors)
+
+ @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
+ def test_lone_surrogates(self):
+ self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")
+ self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")
+ self.assertEqual("[\uDC80]".encode("cp65001", "backslashreplace"),
+ b'[\\udc80]')
+ self.assertEqual("[\uDC80]".encode("cp65001", "xmlcharrefreplace"),
+ b'[&#56448;]')
+ self.assertEqual("[\uDC80]".encode("cp65001", "surrogateescape"),
+ b'[\x80]')
+ self.assertEqual("[\uDC80]".encode("cp65001", "ignore"),
+ b'[]')
+ self.assertEqual("[\uDC80]".encode("cp65001", "replace"),
+ b'[?]')
+
+ @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
+ def test_surrogatepass_handler(self):
+ self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),
+ b"abc\xed\xa0\x80def")
+ self.assertEqual(b"abc\xed\xa0\x80def".decode("cp65001", "surrogatepass"),
+ "abc\ud800def")
+ self.assertEqual("\U00010fff\uD800".encode("cp65001", "surrogatepass"),
+ b"\xf0\x90\xbf\xbf\xed\xa0\x80")
+ self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("cp65001", "surrogatepass"),
+ "\U00010fff\uD800")
+ self.assertTrue(codecs.lookup_error("surrogatepass"))
+
+
+
class UTF7Test(ReadTest):
encoding = "utf-7"
@@ -906,61 +1028,80 @@ class PunycodeTest(unittest.TestCase):
self.assertEqual(uni, puny.decode("punycode"))
class UnicodeInternalTest(unittest.TestCase):
+ @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_bug1251300(self):
# Decoding with unicode_internal used to not correctly handle "code
# points" above 0x10ffff on UCS-4 builds.
- if sys.maxunicode > 0xffff:
- ok = [
- (b"\x00\x10\xff\xff", "\U0010ffff"),
- (b"\x00\x00\x01\x01", "\U00000101"),
- (b"", ""),
- ]
- not_ok = [
- b"\x7f\xff\xff\xff",
- b"\x80\x00\x00\x00",
- b"\x81\x00\x00\x00",
- b"\x00",
- b"\x00\x00\x00\x00\x00",
- ]
- for internal, uni in ok:
- if sys.byteorder == "little":
- internal = bytes(reversed(internal))
+ ok = [
+ (b"\x00\x10\xff\xff", "\U0010ffff"),
+ (b"\x00\x00\x01\x01", "\U00000101"),
+ (b"", ""),
+ ]
+ not_ok = [
+ b"\x7f\xff\xff\xff",
+ b"\x80\x00\x00\x00",
+ b"\x81\x00\x00\x00",
+ b"\x00",
+ b"\x00\x00\x00\x00\x00",
+ ]
+ for internal, uni in ok:
+ if sys.byteorder == "little":
+ internal = bytes(reversed(internal))
+ with support.check_warnings():
self.assertEqual(uni, internal.decode("unicode_internal"))
- for internal in not_ok:
- if sys.byteorder == "little":
- internal = bytes(reversed(internal))
+ for internal in not_ok:
+ if sys.byteorder == "little":
+ internal = bytes(reversed(internal))
+ with support.check_warnings(('unicode_internal codec has been '
+ 'deprecated', DeprecationWarning)):
self.assertRaises(UnicodeDecodeError, internal.decode,
- "unicode_internal")
-
+ "unicode_internal")
+ if sys.byteorder == "little":
+ invalid = b"\x00\x00\x11\x00"
+ else:
+ invalid = b"\x00\x11\x00\x00"
+ with support.check_warnings():
+ self.assertRaises(UnicodeDecodeError,
+ invalid.decode, "unicode_internal")
+ with support.check_warnings():
+ self.assertEqual(invalid.decode("unicode_internal", "replace"),
+ '\ufffd')
+
+ @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_decode_error_attributes(self):
- if sys.maxunicode > 0xffff:
- try:
+ try:
+ with support.check_warnings(('unicode_internal codec has been '
+ 'deprecated', DeprecationWarning)):
b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
- except UnicodeDecodeError as ex:
- self.assertEqual("unicode_internal", ex.encoding)
- self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
- self.assertEqual(4, ex.start)
- self.assertEqual(8, ex.end)
- else:
- self.fail()
+ except UnicodeDecodeError as ex:
+ self.assertEqual("unicode_internal", ex.encoding)
+ self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
+ self.assertEqual(4, ex.start)
+ self.assertEqual(8, ex.end)
+ else:
+ self.fail()
+ @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_decode_callback(self):
- if sys.maxunicode > 0xffff:
- codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
- decoder = codecs.getdecoder("unicode_internal")
+ codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
+ decoder = codecs.getdecoder("unicode_internal")
+ with support.check_warnings(('unicode_internal codec has been '
+ 'deprecated', DeprecationWarning)):
ab = "ab".encode("unicode_internal").decode()
ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"ascii"),
"UnicodeInternalTest")
- self.assertEqual(("ab", 12), ignored)
+ self.assertEqual(("ab", 12), ignored)
def test_encode_length(self):
- # Issue 3739
- encoder = codecs.getencoder("unicode_internal")
- self.assertEqual(encoder("a")[1], 1)
- self.assertEqual(encoder("\xe9\u0142")[1], 2)
+ with support.check_warnings(('unicode_internal codec has been '
+ 'deprecated', DeprecationWarning)):
+ # Issue 3739
+ encoder = codecs.getencoder("unicode_internal")
+ self.assertEqual(encoder("a")[1], 1)
+ self.assertEqual(encoder("\xe9\u0142")[1], 2)
- self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)
+ self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)
# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
nameprep_tests = [
@@ -1284,7 +1425,7 @@ class EncodedFileTest(unittest.TestCase):
self.assertEqual(ef.read(), b'\\\xd5\n\x00\x00\xae')
f = io.BytesIO()
- ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
+ ef = codecs.EncodedFile(f, 'utf-8', 'latin-1')
ef.write(b'\xc3\xbc')
self.assertEqual(f.getvalue(), b'\xfc')
@@ -1416,10 +1557,13 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
elif encoding == "latin_1":
name = "latin_1"
self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
- (b, size) = codecs.getencoder(encoding)(s)
- self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
- (chars, size) = codecs.getdecoder(encoding)(b)
- self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
+
+ with support.check_warnings():
+ # unicode-internal has been deprecated
+ (b, size) = codecs.getencoder(encoding)(s)
+ self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
+ (chars, size) = codecs.getdecoder(encoding)(b)
+ self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
if encoding not in broken_unicode_with_streams:
# check stream reader/writer
@@ -1523,7 +1667,9 @@ class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
def test_bad_encode_args(self):
for encoding in all_unicode_encodings:
encoder = codecs.getencoder(encoding)
- self.assertRaises(TypeError, encoder)
+ with support.check_warnings():
+ # unicode-internal has been deprecated
+ self.assertRaises(TypeError, encoder)
def test_encoding_map_type_initialized(self):
from encodings import cp1140
@@ -1546,6 +1692,11 @@ class CharmapTest(unittest.TestCase):
("abc", 3)
)
+ self.assertEqual(
+ codecs.charmap_decode(b"\x00\x01\x02", "strict", "\U0010FFFFbc"),
+ ("\U0010FFFFbc", 3)
+ )
+
self.assertRaises(UnicodeDecodeError,
codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab"
)
@@ -1654,9 +1805,15 @@ class CharmapTest(unittest.TestCase):
("\U0010FFFFbc", 3)
)
+ self.assertEqual(
+ codecs.charmap_decode(b"\x00\x01\x02", "strict",
+ {0: sys.maxunicode, 1: b, 2: c}),
+ (chr(sys.maxunicode) + "bc", 3)
+ )
+
self.assertRaises(TypeError,
codecs.charmap_decode, b"\x00\x01\x02", "strict",
- {0: 0x110000, 1: b, 2: c}
+ {0: sys.maxunicode + 1, 1: b, 2: c}
)
self.assertRaises(UnicodeDecodeError,
@@ -1720,6 +1877,12 @@ class TypesTest(unittest.TestCase):
self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
+ self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\U00110000")
+ self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
+
+ self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000")
+ self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
+
class SurrogateEscapeTest(unittest.TestCase):
def test_utf8(self):
@@ -1750,7 +1913,7 @@ class SurrogateEscapeTest(unittest.TestCase):
def test_latin1(self):
# Issue6373
- self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin1", "surrogateescape"),
+ self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
b"\xe4\xeb\xef\xf6\xfc")
@@ -1859,6 +2022,155 @@ class TransformCodecTest(unittest.TestCase):
self.assertEqual(sout, b"\x80")
+@unittest.skipUnless(sys.platform == 'win32',
+ 'code pages are specific to Windows')
+class CodePageTest(unittest.TestCase):
+ # CP_UTF8 is already tested by CP65001Test
+ CP_UTF8 = 65001
+
+ def test_invalid_code_page(self):
+ self.assertRaises(ValueError, codecs.code_page_encode, -1, 'a')
+ self.assertRaises(ValueError, codecs.code_page_decode, -1, b'a')
+ self.assertRaises(WindowsError, codecs.code_page_encode, 123, 'a')
+ self.assertRaises(WindowsError, codecs.code_page_decode, 123, b'a')
+
+ def test_code_page_name(self):
+ self.assertRaisesRegex(UnicodeEncodeError, 'cp932',
+ codecs.code_page_encode, 932, '\xff')
+ self.assertRaisesRegex(UnicodeDecodeError, 'cp932',
+ codecs.code_page_decode, 932, b'\x81\x00')
+ self.assertRaisesRegex(UnicodeDecodeError, 'CP_UTF8',
+ codecs.code_page_decode, self.CP_UTF8, b'\xff')
+
+ def check_decode(self, cp, tests):
+ for raw, errors, expected in tests:
+ if expected is not None:
+ try:
+ decoded = codecs.code_page_decode(cp, raw, errors)
+ except UnicodeDecodeError as err:
+ self.fail('Unable to decode %a from "cp%s" with '
+ 'errors=%r: %s' % (raw, cp, errors, err))
+ self.assertEqual(decoded[0], expected,
+ '%a.decode("cp%s", %r)=%a != %a'
+ % (raw, cp, errors, decoded[0], expected))
+ # assert 0 <= decoded[1] <= len(raw)
+ self.assertGreaterEqual(decoded[1], 0)
+ self.assertLessEqual(decoded[1], len(raw))
+ else:
+ self.assertRaises(UnicodeDecodeError,
+ codecs.code_page_decode, cp, raw, errors)
+
+ def check_encode(self, cp, tests):
+ for text, errors, expected in tests:
+ if expected is not None:
+ try:
+ encoded = codecs.code_page_encode(cp, text, errors)
+ except UnicodeEncodeError as err:
+ self.fail('Unable to encode %a to "cp%s" with '
+ 'errors=%r: %s' % (text, cp, errors, err))
+ self.assertEqual(encoded[0], expected,
+ '%a.encode("cp%s", %r)=%a != %a'
+ % (text, cp, errors, encoded[0], expected))
+ self.assertEqual(encoded[1], len(text))
+ else:
+ self.assertRaises(UnicodeEncodeError,
+ codecs.code_page_encode, cp, text, errors)
+
+ def test_cp932(self):
+ self.check_encode(932, (
+ ('abc', 'strict', b'abc'),
+ ('\uff44\u9a3e', 'strict', b'\x82\x84\xe9\x80'),
+ # test error handlers
+ ('\xff', 'strict', None),
+ ('[\xff]', 'ignore', b'[]'),
+ ('[\xff]', 'replace', b'[y]'),
+ ('[\u20ac]', 'replace', b'[?]'),
+ ('[\xff]', 'backslashreplace', b'[\\xff]'),
+ ('[\xff]', 'xmlcharrefreplace', b'[&#255;]'),
+ ))
+ self.check_decode(932, (
+ (b'abc', 'strict', 'abc'),
+ (b'\x82\x84\xe9\x80', 'strict', '\uff44\u9a3e'),
+ # invalid bytes
+ (b'[\xff]', 'strict', None),
+ (b'[\xff]', 'ignore', '[]'),
+ (b'[\xff]', 'replace', '[\ufffd]'),
+ (b'[\xff]', 'surrogateescape', '[\udcff]'),
+ (b'\x81\x00abc', 'strict', None),
+ (b'\x81\x00abc', 'ignore', '\x00abc'),
+ (b'\x81\x00abc', 'replace', '\ufffd\x00abc'),
+ ))
+
+ def test_cp1252(self):
+ self.check_encode(1252, (
+ ('abc', 'strict', b'abc'),
+ ('\xe9\u20ac', 'strict', b'\xe9\x80'),
+ ('\xff', 'strict', b'\xff'),
+ ('\u0141', 'strict', None),
+ ('\u0141', 'ignore', b''),
+ ('\u0141', 'replace', b'L'),
+ ))
+ self.check_decode(1252, (
+ (b'abc', 'strict', 'abc'),
+ (b'\xe9\x80', 'strict', '\xe9\u20ac'),
+ (b'\xff', 'strict', '\xff'),
+ ))
+
+ def test_cp_utf7(self):
+ cp = 65000
+ self.check_encode(cp, (
+ ('abc', 'strict', b'abc'),
+ ('\xe9\u20ac', 'strict', b'+AOkgrA-'),
+ ('\U0010ffff', 'strict', b'+2//f/w-'),
+ ('\udc80', 'strict', b'+3IA-'),
+ ('\ufffd', 'strict', b'+//0-'),
+ ))
+ self.check_decode(cp, (
+ (b'abc', 'strict', 'abc'),
+ (b'+AOkgrA-', 'strict', '\xe9\u20ac'),
+ (b'+2//f/w-', 'strict', '\U0010ffff'),
+ (b'+3IA-', 'strict', '\udc80'),
+ (b'+//0-', 'strict', '\ufffd'),
+ # invalid bytes
+ (b'[+/]', 'strict', '[]'),
+ (b'[\xff]', 'strict', '[\xff]'),
+ ))
+
+ def test_multibyte_encoding(self):
+ self.check_decode(932, (
+ (b'\x84\xe9\x80', 'ignore', '\u9a3e'),
+ (b'\x84\xe9\x80', 'replace', '\ufffd\u9a3e'),
+ ))
+ self.check_decode(self.CP_UTF8, (
+ (b'\xff\xf4\x8f\xbf\xbf', 'ignore', '\U0010ffff'),
+ (b'\xff\xf4\x8f\xbf\xbf', 'replace', '\ufffd\U0010ffff'),
+ ))
+ if VISTA_OR_LATER:
+ self.check_encode(self.CP_UTF8, (
+ ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
+ ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
+ ))
+
+ def test_incremental(self):
+ decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
+ self.assertEqual(decoded, ('', 0))
+
+ decoded = codecs.code_page_decode(932,
+ b'\xe9\x80\xe9', 'strict',
+ False)
+ self.assertEqual(decoded, ('\u9a3e', 2))
+
+ decoded = codecs.code_page_decode(932,
+ b'\xe9\x80\xe9\x80', 'strict',
+ False)
+ self.assertEqual(decoded, ('\u9a3e\u9a3e', 4))
+
+ decoded = codecs.code_page_decode(932,
+ b'abc', 'strict',
+ False)
+ self.assertEqual(decoded, ('abc', 3))
+
+
def test_main():
support.run_unittest(
UTF32Test,
@@ -1869,6 +2181,7 @@ def test_main():
UTF16BETest,
UTF8Test,
UTF8SigTest,
+ CP65001Test,
UTF7Test,
UTF16ExTest,
ReadBufferTest,
@@ -1887,6 +2200,7 @@ def test_main():
SurrogateEscapeTest,
BomTest,
TransformCodecTest,
+ CodePageTest,
)
diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py
index f9db0b4..dfd5431 100644
--- a/Lib/test/test_coding.py
+++ b/Lib/test/test_coding.py
@@ -1,7 +1,6 @@
-
import test.support, unittest
from test.support import TESTFN, unlink, unload
-import os, sys
+import importlib, os, sys
class CodingTest(unittest.TestCase):
def test_bad_coding(self):
@@ -40,6 +39,7 @@ class CodingTest(unittest.TestCase):
f.write("'A very long string %s'\n" % ("X" * 1000))
f.close()
+ importlib.invalidate_caches()
__import__(TESTFN)
finally:
f.close()
diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py
index 8dc5559..88c3129 100644
--- a/Lib/test/test_collections.py
+++ b/Lib/test/test_collections.py
@@ -1,6 +1,7 @@
"""Unit tests for collections.py."""
import unittest, doctest, operator
+from test.support import TESTFN, forget, unlink
import inspect
from test import support
from collections import namedtuple, Counter, OrderedDict, _count_elements
@@ -10,21 +11,20 @@ from random import randrange, shuffle
import keyword
import re
import sys
-from collections import _ChainMap
-from collections import Hashable, Iterable, Iterator
-from collections import Sized, Container, Callable
-from collections import Set, MutableSet
-from collections import Mapping, MutableMapping, KeysView, ItemsView, UserDict
-from collections import Sequence, MutableSequence
-from collections import ByteString
+from collections import UserDict
+from collections import ChainMap
+from collections.abc import Hashable, Iterable, Iterator
+from collections.abc import Sized, Container, Callable
+from collections.abc import Set, MutableSet
+from collections.abc import Mapping, MutableMapping, KeysView, ItemsView
+from collections.abc import Sequence, MutableSequence
+from collections.abc import ByteString
################################################################################
-### _ChainMap (helper class for configparser)
+### ChainMap (helper class for configparser and the string module)
################################################################################
-ChainMap = _ChainMap # rename to keep test code in sync with 3.3 version
-
class TestChainMap(unittest.TestCase):
def test_basics(self):
@@ -128,6 +128,7 @@ class TestNamedTuple(unittest.TestCase):
self.assertEqual(Point.__module__, __name__)
self.assertEqual(Point.__getitem__, tuple.__getitem__)
self.assertEqual(Point._fields, ('x', 'y'))
+ self.assertIn('class Point(tuple)', Point._source)
self.assertRaises(ValueError, namedtuple, 'abc%', 'efg ghi') # type has non-alpha char
self.assertRaises(ValueError, namedtuple, 'class', 'efg ghi') # type has keyword
@@ -327,6 +328,17 @@ class TestNamedTuple(unittest.TestCase):
pass
self.assertEqual(repr(B(1)), 'B(x=1)')
+ def test_source(self):
+ # verify that _source can be run through exec()
+ tmp = namedtuple('NTColor', 'red green blue')
+ globals().pop('NTColor', None) # remove artifacts from other tests
+ exec(tmp._source, globals())
+ self.assertIn('NTColor', globals())
+ c = NTColor(10, 20, 30)
+ self.assertEqual((c.red, c.green, c.blue), (10, 20, 30))
+ self.assertEqual(NTColor._fields, ('red', 'green', 'blue'))
+ globals().pop('NTColor', None) # clean-up after this test
+
################################################################################
### Abstract Base Classes
@@ -729,6 +741,44 @@ class TestCollectionABCs(ABCTestCase):
self.validate_abstract_methods(MutableSequence, '__contains__', '__iter__',
'__len__', '__getitem__', '__setitem__', '__delitem__', 'insert')
+ def test_MutableSequence_mixins(self):
+ # Test the mixins of MutableSequence by creating a miminal concrete
+ # class inherited from it.
+ class MutableSequenceSubclass(MutableSequence):
+ def __init__(self):
+ self.lst = []
+
+ def __setitem__(self, index, value):
+ self.lst[index] = value
+
+ def __getitem__(self, index):
+ return self.lst[index]
+
+ def __len__(self):
+ return len(self.lst)
+
+ def __delitem__(self, index):
+ del self.lst[index]
+
+ def insert(self, index, value):
+ self.lst.insert(index, value)
+
+ mss = MutableSequenceSubclass()
+ mss.append(0)
+ mss.extend((1, 2, 3, 4))
+ self.assertEqual(len(mss), 5)
+ self.assertEqual(mss[3], 3)
+ mss.reverse()
+ self.assertEqual(mss[3], 1)
+ mss.pop()
+ self.assertEqual(len(mss), 4)
+ mss.remove(3)
+ self.assertEqual(len(mss), 3)
+ mss += (10, 20, 30)
+ self.assertEqual(len(mss), 6)
+ self.assertEqual(mss[-1], 30)
+ mss.clear()
+ self.assertEqual(len(mss), 0)
################################################################################
### Counter
@@ -882,6 +932,27 @@ class TestCounter(unittest.TestCase):
set_result = setop(set(p.elements()), set(q.elements()))
self.assertEqual(counter_result, dict.fromkeys(set_result, 1))
+ def test_inplace_operations(self):
+ elements = 'abcd'
+ for i in range(1000):
+ # test random pairs of multisets
+ p = Counter(dict((elem, randrange(-2,4)) for elem in elements))
+ p.update(e=1, f=-1, g=0)
+ q = Counter(dict((elem, randrange(-2,4)) for elem in elements))
+ q.update(h=1, i=-1, j=0)
+ for inplace_op, regular_op in [
+ (Counter.__iadd__, Counter.__add__),
+ (Counter.__isub__, Counter.__sub__),
+ (Counter.__ior__, Counter.__or__),
+ (Counter.__iand__, Counter.__and__),
+ ]:
+ c = p.copy()
+ c_id = id(c)
+ regular_result = regular_op(c, q)
+ inplace_result = inplace_op(c, q)
+ self.assertEqual(inplace_result, regular_result)
+ self.assertEqual(id(inplace_result), c_id)
+
def test_subtract(self):
c = Counter(a=-5, b=0, c=5, d=10, e=15,g=40)
c.subtract(a=1, b=2, c=-3, d=10, e=20, f=30, h=-50)
@@ -893,6 +964,11 @@ class TestCounter(unittest.TestCase):
c.subtract('aaaabbcce')
self.assertEqual(c, Counter(a=-1, b=0, c=-1, d=1, e=-1))
+ def test_unary(self):
+ c = Counter(a=-5, b=0, c=5, d=10, e=15,g=40)
+ self.assertEqual(dict(+c), dict(c=5, d=10, e=15, g=40))
+ self.assertEqual(dict(-c), dict(a=5))
+
def test_repr_nonsortable(self):
c = Counter(a=2, b=None)
r = repr(c)
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index 58ef297..72342f8 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -1,10 +1,17 @@
import unittest
import sys
import _ast
+import types
from test import support
class TestSpecifics(unittest.TestCase):
+ def compile_single(self, source):
+ compile(source, "<single>", "single")
+
+ def assertInvalidSingle(self, source):
+ self.assertRaises(SyntaxError, self.compile_single, source)
+
def test_no_ending_newline(self):
compile("hi", "<test>", "exec")
compile("hi\r", "<test>", "exec")
@@ -433,6 +440,39 @@ if 1:
ast.body = [_ast.BoolOp()]
self.assertRaises(TypeError, compile, ast, '<ast>', 'exec')
+ @support.cpython_only
+ def test_same_filename_used(self):
+ s = """def f(): pass\ndef g(): pass"""
+ c = compile(s, "myfile", "exec")
+ for obj in c.co_consts:
+ if isinstance(obj, types.CodeType):
+ self.assertIs(obj.co_filename, c.co_filename)
+
+ def test_single_statement(self):
+ self.compile_single("1 + 2")
+ self.compile_single("\n1 + 2")
+ self.compile_single("1 + 2\n")
+ self.compile_single("1 + 2\n\n")
+ self.compile_single("1 + 2\t\t\n")
+ self.compile_single("1 + 2\t\t\n ")
+ self.compile_single("1 + 2 # one plus two")
+ self.compile_single("1; 2")
+ self.compile_single("import sys; sys")
+ self.compile_single("def f():\n pass")
+ self.compile_single("while False:\n pass")
+ self.compile_single("if x:\n f(x)")
+ self.compile_single("if x:\n f(x)\nelse:\n g(x)")
+ self.compile_single("class T:\n pass")
+
+ def test_bad_single_statement(self):
+ self.assertInvalidSingle('1\n2')
+ self.assertInvalidSingle('def f(): pass')
+ self.assertInvalidSingle('a = 13\nb = 187')
+ self.assertInvalidSingle('del x\ndel y')
+ self.assertInvalidSingle('f()\ng()')
+ self.assertInvalidSingle('f()\n# blah\nblah()')
+ self.assertInvalidSingle('f()\nxy # blah\nblah()')
+ self.assertInvalidSingle('x = 5 # comment\nx = 6\n')
def test_main():
support.run_unittest(TestSpecifics)
diff --git a/Lib/test/test_concurrent_futures.py b/Lib/test/test_concurrent_futures.py
index 2afa938..6ae450d 100644
--- a/Lib/test/test_concurrent_futures.py
+++ b/Lib/test/test_concurrent_futures.py
@@ -19,7 +19,7 @@ import unittest
from concurrent import futures
from concurrent.futures._base import (
PENDING, RUNNING, CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED, Future)
-import concurrent.futures.process
+from concurrent.futures.process import BrokenProcessPool
def create_future(state=PENDING, exception=None, result=None):
@@ -34,7 +34,7 @@ PENDING_FUTURE = create_future(state=PENDING)
RUNNING_FUTURE = create_future(state=RUNNING)
CANCELLED_FUTURE = create_future(state=CANCELLED)
CANCELLED_AND_NOTIFIED_FUTURE = create_future(state=CANCELLED_AND_NOTIFIED)
-EXCEPTION_FUTURE = create_future(state=FINISHED, exception=IOError())
+EXCEPTION_FUTURE = create_future(state=FINISHED, exception=OSError())
SUCCESSFUL_FUTURE = create_future(state=FINISHED, result=42)
@@ -160,7 +160,7 @@ class ProcessPoolShutdownTest(ProcessPoolMixin, ExecutorShutdownTest):
processes = self.executor._processes
self.executor.shutdown()
- for p in processes:
+ for p in processes.values():
p.join()
def test_context_manager_shutdown(self):
@@ -169,7 +169,7 @@ class ProcessPoolShutdownTest(ProcessPoolMixin, ExecutorShutdownTest):
self.assertEqual(list(e.map(abs, range(-5, 5))),
[5, 4, 3, 2, 1, 0, 1, 2, 3, 4])
- for p in processes:
+ for p in processes.values():
p.join()
def test_del_shutdown(self):
@@ -180,7 +180,7 @@ class ProcessPoolShutdownTest(ProcessPoolMixin, ExecutorShutdownTest):
del executor
queue_management_thread.join()
- for p in processes:
+ for p in processes.values():
p.join()
@@ -268,14 +268,14 @@ class WaitTests(unittest.TestCase):
def test_timeout(self):
future1 = self.executor.submit(mul, 6, 7)
- future2 = self.executor.submit(time.sleep, 3)
+ future2 = self.executor.submit(time.sleep, 6)
finished, pending = futures.wait(
[CANCELLED_AND_NOTIFIED_FUTURE,
EXCEPTION_FUTURE,
SUCCESSFUL_FUTURE,
future1, future2],
- timeout=1.5,
+ timeout=5,
return_when=futures.ALL_COMPLETED)
self.assertEqual(set([CANCELLED_AND_NOTIFIED_FUTURE,
@@ -379,8 +379,8 @@ class ExecutorTest(unittest.TestCase):
results = []
try:
for i in self.executor.map(time.sleep,
- [0, 0, 3],
- timeout=1.5):
+ [0, 0, 6],
+ timeout=5):
results.append(i)
except futures.TimeoutError:
pass
@@ -389,13 +389,38 @@ class ExecutorTest(unittest.TestCase):
self.assertEqual([None, None], results)
+ def test_shutdown_race_issue12456(self):
+ # Issue #12456: race condition at shutdown where trying to post a
+ # sentinel in the call queue blocks (the queue is full while processes
+ # have exited).
+ self.executor.map(str, [2] * (self.worker_count + 1))
+ self.executor.shutdown()
+
class ThreadPoolExecutorTest(ThreadPoolMixin, ExecutorTest):
- pass
+ def test_map_submits_without_iteration(self):
+ """Tests verifying issue 11777."""
+ finished = []
+ def record_finished(n):
+ finished.append(n)
+
+ self.executor.map(record_finished, range(10))
+ self.executor.shutdown(wait=True)
+ self.assertCountEqual(finished, range(10))
class ProcessPoolExecutorTest(ProcessPoolMixin, ExecutorTest):
- pass
+ def test_killed_child(self):
+ # When a child process is abruptly terminated, the whole pool gets
+ # "broken".
+ futures = [self.executor.submit(time.sleep, 3)]
+ # Get one of the processes, and terminate (kill) it
+ p = next(iter(self.executor._processes.values()))
+ p.terminate()
+ for fut in futures:
+ self.assertRaises(BrokenProcessPool, fut.result)
+ # Submitting other jobs fails as well.
+ self.assertRaises(BrokenProcessPool, self.executor.submit, pow, 2, 8)
class FutureTests(unittest.TestCase):
@@ -498,7 +523,7 @@ class FutureTests(unittest.TestCase):
'<Future at 0x[0-9a-f]+ state=cancelled>')
self.assertRegex(
repr(EXCEPTION_FUTURE),
- '<Future at 0x[0-9a-f]+ state=finished raised IOError>')
+ '<Future at 0x[0-9a-f]+ state=finished raised OSError>')
self.assertRegex(
repr(SUCCESSFUL_FUTURE),
'<Future at 0x[0-9a-f]+ state=finished returned int>')
@@ -509,7 +534,7 @@ class FutureTests(unittest.TestCase):
f2 = create_future(state=RUNNING)
f3 = create_future(state=CANCELLED)
f4 = create_future(state=CANCELLED_AND_NOTIFIED)
- f5 = create_future(state=FINISHED, exception=IOError())
+ f5 = create_future(state=FINISHED, exception=OSError())
f6 = create_future(state=FINISHED, result=5)
self.assertTrue(f1.cancel())
@@ -563,7 +588,7 @@ class FutureTests(unittest.TestCase):
CANCELLED_FUTURE.result, timeout=0)
self.assertRaises(futures.CancelledError,
CANCELLED_AND_NOTIFIED_FUTURE.result, timeout=0)
- self.assertRaises(IOError, EXCEPTION_FUTURE.result, timeout=0)
+ self.assertRaises(OSError, EXCEPTION_FUTURE.result, timeout=0)
self.assertEqual(SUCCESSFUL_FUTURE.result(timeout=0), 42)
def test_result_with_success(self):
@@ -602,7 +627,7 @@ class FutureTests(unittest.TestCase):
self.assertRaises(futures.CancelledError,
CANCELLED_AND_NOTIFIED_FUTURE.exception, timeout=0)
self.assertTrue(isinstance(EXCEPTION_FUTURE.exception(timeout=0),
- IOError))
+ OSError))
self.assertEqual(SUCCESSFUL_FUTURE.exception(timeout=0), None)
def test_exception_with_success(self):
@@ -611,14 +636,14 @@ class FutureTests(unittest.TestCase):
time.sleep(1)
with f1._condition:
f1._state = FINISHED
- f1._exception = IOError()
+ f1._exception = OSError()
f1._condition.notify_all()
f1 = create_future(state=PENDING)
t = threading.Thread(target=notification)
t.start()
- self.assertTrue(isinstance(f1.exception(timeout=5), IOError))
+ self.assertTrue(isinstance(f1.exception(timeout=5), OSError))
@test.support.reap_threads
def test_main():
@@ -631,7 +656,8 @@ def test_main():
ThreadPoolAsCompletedTests,
FutureTests,
ProcessPoolShutdownTest,
- ThreadPoolShutdownTest)
+ ThreadPoolShutdownTest,
+ )
finally:
test.support.reap_children()
diff --git a/Lib/test/test_cfgparser.py b/Lib/test/test_configparser.py
index a6e9050..8d82182 100644
--- a/Lib/test/test_cfgparser.py
+++ b/Lib/test/test_configparser.py
@@ -1618,6 +1618,42 @@ class ExceptionPicklingTestCase(unittest.TestCase):
self.assertEqual(repr(e1), repr(e2))
+class InlineCommentStrippingTestCase(unittest.TestCase):
+ """Tests for issue #14590: ConfigParser doesn't strip inline comment when
+ delimiter occurs earlier without preceding space.."""
+
+ def test_stripping(self):
+ cfg = configparser.ConfigParser(inline_comment_prefixes=(';', '#',
+ '//'))
+ cfg.read_string("""
+ [section]
+ k1 = v1;still v1
+ k2 = v2 ;a comment
+ k3 = v3 ; also a comment
+ k4 = v4;still v4 ;a comment
+ k5 = v5;still v5 ; also a comment
+ k6 = v6;still v6; and still v6 ;a comment
+ k7 = v7;still v7; and still v7 ; also a comment
+
+ [multiprefix]
+ k1 = v1;still v1 #a comment ; yeah, pretty much
+ k2 = v2 // this already is a comment ; continued
+ k3 = v3;#//still v3# and still v3 ; a comment
+ """)
+ s = cfg['section']
+ self.assertEqual(s['k1'], 'v1;still v1')
+ self.assertEqual(s['k2'], 'v2')
+ self.assertEqual(s['k3'], 'v3')
+ self.assertEqual(s['k4'], 'v4;still v4')
+ self.assertEqual(s['k5'], 'v5;still v5')
+ self.assertEqual(s['k6'], 'v6;still v6; and still v6')
+ self.assertEqual(s['k7'], 'v7;still v7; and still v7')
+ s = cfg['multiprefix']
+ self.assertEqual(s['k1'], 'v1;still v1')
+ self.assertEqual(s['k2'], 'v2')
+ self.assertEqual(s['k3'], 'v3;#//still v3# and still v3')
+
+
def test_main():
support.run_unittest(
ConfigParserTestCase,
@@ -1640,4 +1676,5 @@ def test_main():
ReadFileTestCase,
CoverageOneHundredTestCase,
ExceptionPicklingTestCase,
+ InlineCommentStrippingTestCase,
)
diff --git a/Lib/test/test_contextlib.py b/Lib/test/test_contextlib.py
index 6e38305..e52ed91 100644
--- a/Lib/test/test_contextlib.py
+++ b/Lib/test/test_contextlib.py
@@ -370,6 +370,231 @@ class TestContextDecorator(unittest.TestCase):
self.assertEqual(state, [1, 'something else', 999])
+class TestExitStack(unittest.TestCase):
+
+ def test_no_resources(self):
+ with ExitStack():
+ pass
+
+ def test_callback(self):
+ expected = [
+ ((), {}),
+ ((1,), {}),
+ ((1,2), {}),
+ ((), dict(example=1)),
+ ((1,), dict(example=1)),
+ ((1,2), dict(example=1)),
+ ]
+ result = []
+ def _exit(*args, **kwds):
+ """Test metadata propagation"""
+ result.append((args, kwds))
+ with ExitStack() as stack:
+ for args, kwds in reversed(expected):
+ if args and kwds:
+ f = stack.callback(_exit, *args, **kwds)
+ elif args:
+ f = stack.callback(_exit, *args)
+ elif kwds:
+ f = stack.callback(_exit, **kwds)
+ else:
+ f = stack.callback(_exit)
+ self.assertIs(f, _exit)
+ for wrapper in stack._exit_callbacks:
+ self.assertIs(wrapper.__wrapped__, _exit)
+ self.assertNotEqual(wrapper.__name__, _exit.__name__)
+ self.assertIsNone(wrapper.__doc__, _exit.__doc__)
+ self.assertEqual(result, expected)
+
+ def test_push(self):
+ exc_raised = ZeroDivisionError
+ def _expect_exc(exc_type, exc, exc_tb):
+ self.assertIs(exc_type, exc_raised)
+ def _suppress_exc(*exc_details):
+ return True
+ def _expect_ok(exc_type, exc, exc_tb):
+ self.assertIsNone(exc_type)
+ self.assertIsNone(exc)
+ self.assertIsNone(exc_tb)
+ class ExitCM(object):
+ def __init__(self, check_exc):
+ self.check_exc = check_exc
+ def __enter__(self):
+ self.fail("Should not be called!")
+ def __exit__(self, *exc_details):
+ self.check_exc(*exc_details)
+ with ExitStack() as stack:
+ stack.push(_expect_ok)
+ self.assertIs(stack._exit_callbacks[-1], _expect_ok)
+ cm = ExitCM(_expect_ok)
+ stack.push(cm)
+ self.assertIs(stack._exit_callbacks[-1].__self__, cm)
+ stack.push(_suppress_exc)
+ self.assertIs(stack._exit_callbacks[-1], _suppress_exc)
+ cm = ExitCM(_expect_exc)
+ stack.push(cm)
+ self.assertIs(stack._exit_callbacks[-1].__self__, cm)
+ stack.push(_expect_exc)
+ self.assertIs(stack._exit_callbacks[-1], _expect_exc)
+ stack.push(_expect_exc)
+ self.assertIs(stack._exit_callbacks[-1], _expect_exc)
+ 1/0
+
+ def test_enter_context(self):
+ class TestCM(object):
+ def __enter__(self):
+ result.append(1)
+ def __exit__(self, *exc_details):
+ result.append(3)
+
+ result = []
+ cm = TestCM()
+ with ExitStack() as stack:
+ @stack.callback # Registered first => cleaned up last
+ def _exit():
+ result.append(4)
+ self.assertIsNotNone(_exit)
+ stack.enter_context(cm)
+ self.assertIs(stack._exit_callbacks[-1].__self__, cm)
+ result.append(2)
+ self.assertEqual(result, [1, 2, 3, 4])
+
+ def test_close(self):
+ result = []
+ with ExitStack() as stack:
+ @stack.callback
+ def _exit():
+ result.append(1)
+ self.assertIsNotNone(_exit)
+ stack.close()
+ result.append(2)
+ self.assertEqual(result, [1, 2])
+
+ def test_pop_all(self):
+ result = []
+ with ExitStack() as stack:
+ @stack.callback
+ def _exit():
+ result.append(3)
+ self.assertIsNotNone(_exit)
+ new_stack = stack.pop_all()
+ result.append(1)
+ result.append(2)
+ new_stack.close()
+ self.assertEqual(result, [1, 2, 3])
+
+ def test_exit_raise(self):
+ with self.assertRaises(ZeroDivisionError):
+ with ExitStack() as stack:
+ stack.push(lambda *exc: False)
+ 1/0
+
+ def test_exit_suppress(self):
+ with ExitStack() as stack:
+ stack.push(lambda *exc: True)
+ 1/0
+
+ def test_exit_exception_chaining_reference(self):
+ # Sanity check to make sure that ExitStack chaining matches
+ # actual nested with statements
+ class RaiseExc:
+ def __init__(self, exc):
+ self.exc = exc
+ def __enter__(self):
+ return self
+ def __exit__(self, *exc_details):
+ raise self.exc
+
+ class RaiseExcWithContext:
+ def __init__(self, outer, inner):
+ self.outer = outer
+ self.inner = inner
+ def __enter__(self):
+ return self
+ def __exit__(self, *exc_details):
+ try:
+ raise self.inner
+ except:
+ raise self.outer
+
+ class SuppressExc:
+ def __enter__(self):
+ return self
+ def __exit__(self, *exc_details):
+ type(self).saved_details = exc_details
+ return True
+
+ try:
+ with RaiseExc(IndexError):
+ with RaiseExcWithContext(KeyError, AttributeError):
+ with SuppressExc():
+ with RaiseExc(ValueError):
+ 1 / 0
+ except IndexError as exc:
+ self.assertIsInstance(exc.__context__, KeyError)
+ self.assertIsInstance(exc.__context__.__context__, AttributeError)
+ # Inner exceptions were suppressed
+ self.assertIsNone(exc.__context__.__context__.__context__)
+ else:
+ self.fail("Expected IndexError, but no exception was raised")
+ # Check the inner exceptions
+ inner_exc = SuppressExc.saved_details[1]
+ self.assertIsInstance(inner_exc, ValueError)
+ self.assertIsInstance(inner_exc.__context__, ZeroDivisionError)
+
+ def test_exit_exception_chaining(self):
+ # Ensure exception chaining matches the reference behaviour
+ def raise_exc(exc):
+ raise exc
+
+ saved_details = None
+ def suppress_exc(*exc_details):
+ nonlocal saved_details
+ saved_details = exc_details
+ return True
+
+ try:
+ with ExitStack() as stack:
+ stack.callback(raise_exc, IndexError)
+ stack.callback(raise_exc, KeyError)
+ stack.callback(raise_exc, AttributeError)
+ stack.push(suppress_exc)
+ stack.callback(raise_exc, ValueError)
+ 1 / 0
+ except IndexError as exc:
+ self.assertIsInstance(exc.__context__, KeyError)
+ self.assertIsInstance(exc.__context__.__context__, AttributeError)
+ # Inner exceptions were suppressed
+ self.assertIsNone(exc.__context__.__context__.__context__)
+ else:
+ self.fail("Expected IndexError, but no exception was raised")
+ # Check the inner exceptions
+ inner_exc = saved_details[1]
+ self.assertIsInstance(inner_exc, ValueError)
+ self.assertIsInstance(inner_exc.__context__, ZeroDivisionError)
+
+ def test_exit_exception_chaining_suppress(self):
+ with ExitStack() as stack:
+ stack.push(lambda *exc: True)
+ stack.push(lambda *exc: 1/0)
+ stack.push(lambda *exc: {}[1])
+
+ def test_excessive_nesting(self):
+ # The original implementation would die with RecursionError here
+ with ExitStack() as stack:
+ for i in range(10000):
+ stack.callback(int)
+
+ def test_instance_bypass(self):
+ class Example(object): pass
+ cm = Example()
+ cm.__exit__ = object()
+ stack = ExitStack()
+ self.assertRaises(AttributeError, stack.enter_context, cm)
+ stack.push(cm)
+ self.assertIs(stack._exit_callbacks[-1], cm)
+
+
# This is needed to make the test actually run under regrtest.py!
def test_main():
support.run_unittest(__name__)
diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py
index a84c109..c4baae4 100644
--- a/Lib/test/test_copy.py
+++ b/Lib/test/test_copy.py
@@ -17,7 +17,7 @@ class TestCopy(unittest.TestCase):
# Attempt full line coverage of copy.py from top to bottom
def test_exceptions(self):
- self.assertTrue(copy.Error is copy.error)
+ self.assertIs(copy.Error, copy.error)
self.assertTrue(issubclass(copy.Error, Exception))
# The copy() method
@@ -54,20 +54,26 @@ class TestCopy(unittest.TestCase):
def test_copy_reduce_ex(self):
class C(object):
def __reduce_ex__(self, proto):
+ c.append(1)
return ""
def __reduce__(self):
- raise support.TestFailed("shouldn't call this")
+ self.fail("shouldn't call this")
+ c = []
x = C()
y = copy.copy(x)
- self.assertTrue(y is x)
+ self.assertIs(y, x)
+ self.assertEqual(c, [1])
def test_copy_reduce(self):
class C(object):
def __reduce__(self):
+ c.append(1)
return ""
+ c = []
x = C()
y = copy.copy(x)
- self.assertTrue(y is x)
+ self.assertIs(y, x)
+ self.assertEqual(c, [1])
def test_copy_cant(self):
class C(object):
@@ -91,7 +97,7 @@ class TestCopy(unittest.TestCase):
"hello", "hello\u1234", f.__code__,
NewStyle, range(10), Classic, max]
for x in tests:
- self.assertTrue(copy.copy(x) is x, repr(x))
+ self.assertIs(copy.copy(x), x)
def test_copy_list(self):
x = [1, 2, 3]
@@ -185,9 +191,9 @@ class TestCopy(unittest.TestCase):
x = [x, x]
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(y is not x)
- self.assertTrue(y[0] is not x[0])
- self.assertTrue(y[0] is y[1])
+ self.assertIsNot(y, x)
+ self.assertIsNot(y[0], x[0])
+ self.assertIs(y[0], y[1])
def test_deepcopy_issubclass(self):
# XXX Note: there's no way to test the TypeError coming out of
@@ -227,20 +233,26 @@ class TestCopy(unittest.TestCase):
def test_deepcopy_reduce_ex(self):
class C(object):
def __reduce_ex__(self, proto):
+ c.append(1)
return ""
def __reduce__(self):
- raise support.TestFailed("shouldn't call this")
+ self.fail("shouldn't call this")
+ c = []
x = C()
y = copy.deepcopy(x)
- self.assertTrue(y is x)
+ self.assertIs(y, x)
+ self.assertEqual(c, [1])
def test_deepcopy_reduce(self):
class C(object):
def __reduce__(self):
+ c.append(1)
return ""
+ c = []
x = C()
y = copy.deepcopy(x)
- self.assertTrue(y is x)
+ self.assertIs(y, x)
+ self.assertEqual(c, [1])
def test_deepcopy_cant(self):
class C(object):
@@ -264,14 +276,14 @@ class TestCopy(unittest.TestCase):
"hello", "hello\u1234", f.__code__,
NewStyle, range(10), Classic, max]
for x in tests:
- self.assertTrue(copy.deepcopy(x) is x, repr(x))
+ self.assertIs(copy.deepcopy(x), x)
def test_deepcopy_list(self):
x = [[1, 2], 3]
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(x is not y)
- self.assertTrue(x[0] is not y[0])
+ self.assertIsNot(x, y)
+ self.assertIsNot(x[0], y[0])
def test_deepcopy_reflexive_list(self):
x = []
@@ -279,16 +291,26 @@ class TestCopy(unittest.TestCase):
y = copy.deepcopy(x)
for op in comparisons:
self.assertRaises(RuntimeError, op, y, x)
- self.assertTrue(y is not x)
- self.assertTrue(y[0] is y)
+ self.assertIsNot(y, x)
+ self.assertIs(y[0], y)
self.assertEqual(len(y), 1)
+ def test_deepcopy_empty_tuple(self):
+ x = ()
+ y = copy.deepcopy(x)
+ self.assertIs(x, y)
+
def test_deepcopy_tuple(self):
x = ([1, 2], 3)
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(x is not y)
- self.assertTrue(x[0] is not y[0])
+ self.assertIsNot(x, y)
+ self.assertIsNot(x[0], y[0])
+
+ def test_deepcopy_tuple_of_immutables(self):
+ x = ((1, 2), 3)
+ y = copy.deepcopy(x)
+ self.assertIs(x, y)
def test_deepcopy_reflexive_tuple(self):
x = ([],)
@@ -296,16 +318,16 @@ class TestCopy(unittest.TestCase):
y = copy.deepcopy(x)
for op in comparisons:
self.assertRaises(RuntimeError, op, y, x)
- self.assertTrue(y is not x)
- self.assertTrue(y[0] is not x[0])
- self.assertTrue(y[0][0] is y)
+ self.assertIsNot(y, x)
+ self.assertIsNot(y[0], x[0])
+ self.assertIs(y[0][0], y)
def test_deepcopy_dict(self):
x = {"foo": [1, 2], "bar": 3}
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(x is not y)
- self.assertTrue(x["foo"] is not y["foo"])
+ self.assertIsNot(x, y)
+ self.assertIsNot(x["foo"], y["foo"])
def test_deepcopy_reflexive_dict(self):
x = {}
@@ -315,15 +337,30 @@ class TestCopy(unittest.TestCase):
self.assertRaises(TypeError, op, y, x)
for op in equality_comparisons:
self.assertRaises(RuntimeError, op, y, x)
- self.assertTrue(y is not x)
- self.assertTrue(y['foo'] is y)
+ self.assertIsNot(y, x)
+ self.assertIs(y['foo'], y)
self.assertEqual(len(y), 1)
def test_deepcopy_keepalive(self):
memo = {}
- x = 42
+ x = []
+ y = copy.deepcopy(x, memo)
+ self.assertIs(memo[id(memo)][0], x)
+
+ def test_deepcopy_dont_memo_immutable(self):
+ memo = {}
+ x = [1, 2, 3, 4]
y = copy.deepcopy(x, memo)
- self.assertTrue(memo[id(x)] is x)
+ self.assertEqual(y, x)
+ # There's the entry for the new list, and the keep alive.
+ self.assertEqual(len(memo), 2)
+
+ memo = {}
+ x = [(1, 2)]
+ y = copy.deepcopy(x, memo)
+ self.assertEqual(y, x)
+ # Tuples with immutable contents are immutable for deepcopy.
+ self.assertEqual(len(memo), 2)
def test_deepcopy_inst_vanilla(self):
class C:
@@ -334,7 +371,7 @@ class TestCopy(unittest.TestCase):
x = C([42])
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(y.foo is not x.foo)
+ self.assertIsNot(y.foo, x.foo)
def test_deepcopy_inst_deepcopy(self):
class C:
@@ -347,8 +384,8 @@ class TestCopy(unittest.TestCase):
x = C([42])
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(y is not x)
- self.assertTrue(y.foo is not x.foo)
+ self.assertIsNot(y, x)
+ self.assertIsNot(y.foo, x.foo)
def test_deepcopy_inst_getinitargs(self):
class C:
@@ -361,8 +398,8 @@ class TestCopy(unittest.TestCase):
x = C([42])
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(y is not x)
- self.assertTrue(y.foo is not x.foo)
+ self.assertIsNot(y, x)
+ self.assertIsNot(y.foo, x.foo)
def test_deepcopy_inst_getstate(self):
class C:
@@ -375,8 +412,8 @@ class TestCopy(unittest.TestCase):
x = C([42])
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(y is not x)
- self.assertTrue(y.foo is not x.foo)
+ self.assertIsNot(y, x)
+ self.assertIsNot(y.foo, x.foo)
def test_deepcopy_inst_setstate(self):
class C:
@@ -389,8 +426,8 @@ class TestCopy(unittest.TestCase):
x = C([42])
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(y is not x)
- self.assertTrue(y.foo is not x.foo)
+ self.assertIsNot(y, x)
+ self.assertIsNot(y.foo, x.foo)
def test_deepcopy_inst_getstate_setstate(self):
class C:
@@ -405,8 +442,8 @@ class TestCopy(unittest.TestCase):
x = C([42])
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(y is not x)
- self.assertTrue(y.foo is not x.foo)
+ self.assertIsNot(y, x)
+ self.assertIsNot(y.foo, x.foo)
def test_deepcopy_reflexive_inst(self):
class C:
@@ -414,8 +451,8 @@ class TestCopy(unittest.TestCase):
x = C()
x.foo = x
y = copy.deepcopy(x)
- self.assertTrue(y is not x)
- self.assertTrue(y.foo is y)
+ self.assertIsNot(y, x)
+ self.assertIs(y.foo, y)
# _reconstruct()
@@ -425,9 +462,9 @@ class TestCopy(unittest.TestCase):
return ""
x = C()
y = copy.copy(x)
- self.assertTrue(y is x)
+ self.assertIs(y, x)
y = copy.deepcopy(x)
- self.assertTrue(y is x)
+ self.assertIs(y, x)
def test_reconstruct_nostate(self):
class C(object):
@@ -436,9 +473,9 @@ class TestCopy(unittest.TestCase):
x = C()
x.foo = 42
y = copy.copy(x)
- self.assertTrue(y.__class__ is x.__class__)
+ self.assertIs(y.__class__, x.__class__)
y = copy.deepcopy(x)
- self.assertTrue(y.__class__ is x.__class__)
+ self.assertIs(y.__class__, x.__class__)
def test_reconstruct_state(self):
class C(object):
@@ -452,7 +489,7 @@ class TestCopy(unittest.TestCase):
self.assertEqual(y, x)
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(y.foo is not x.foo)
+ self.assertIsNot(y.foo, x.foo)
def test_reconstruct_state_setstate(self):
class C(object):
@@ -468,7 +505,7 @@ class TestCopy(unittest.TestCase):
self.assertEqual(y, x)
y = copy.deepcopy(x)
self.assertEqual(y, x)
- self.assertTrue(y.foo is not x.foo)
+ self.assertIsNot(y.foo, x.foo)
def test_reconstruct_reflexive(self):
class C(object):
@@ -476,8 +513,8 @@ class TestCopy(unittest.TestCase):
x = C()
x.foo = x
y = copy.deepcopy(x)
- self.assertTrue(y is not x)
- self.assertTrue(y.foo is y)
+ self.assertIsNot(y, x)
+ self.assertIs(y.foo, y)
# Additions for Python 2.3 and pickle protocol 2
@@ -491,12 +528,12 @@ class TestCopy(unittest.TestCase):
x = C([[1, 2], 3])
y = copy.copy(x)
self.assertEqual(x, y)
- self.assertTrue(x is not y)
- self.assertTrue(x[0] is y[0])
+ self.assertIsNot(x, y)
+ self.assertIs(x[0], y[0])
y = copy.deepcopy(x)
self.assertEqual(x, y)
- self.assertTrue(x is not y)
- self.assertTrue(x[0] is not y[0])
+ self.assertIsNot(x, y)
+ self.assertIsNot(x[0], y[0])
def test_reduce_5tuple(self):
class C(dict):
@@ -508,12 +545,12 @@ class TestCopy(unittest.TestCase):
x = C([("foo", [1, 2]), ("bar", 3)])
y = copy.copy(x)
self.assertEqual(x, y)
- self.assertTrue(x is not y)
- self.assertTrue(x["foo"] is y["foo"])
+ self.assertIsNot(x, y)
+ self.assertIs(x["foo"], y["foo"])
y = copy.deepcopy(x)
self.assertEqual(x, y)
- self.assertTrue(x is not y)
- self.assertTrue(x["foo"] is not y["foo"])
+ self.assertIsNot(x, y)
+ self.assertIsNot(x["foo"], y["foo"])
def test_copy_slots(self):
class C(object):
@@ -521,7 +558,7 @@ class TestCopy(unittest.TestCase):
x = C()
x.foo = [42]
y = copy.copy(x)
- self.assertTrue(x.foo is y.foo)
+ self.assertIs(x.foo, y.foo)
def test_deepcopy_slots(self):
class C(object):
@@ -530,7 +567,7 @@ class TestCopy(unittest.TestCase):
x.foo = [42]
y = copy.deepcopy(x)
self.assertEqual(x.foo, y.foo)
- self.assertTrue(x.foo is not y.foo)
+ self.assertIsNot(x.foo, y.foo)
def test_deepcopy_dict_subclass(self):
class C(dict):
@@ -547,7 +584,7 @@ class TestCopy(unittest.TestCase):
y = copy.deepcopy(x)
self.assertEqual(x, y)
self.assertEqual(x._keys, y._keys)
- self.assertTrue(x is not y)
+ self.assertIsNot(x, y)
x['bar'] = 1
self.assertNotEqual(x, y)
self.assertNotEqual(x._keys, y._keys)
@@ -560,8 +597,8 @@ class TestCopy(unittest.TestCase):
y = copy.copy(x)
self.assertEqual(list(x), list(y))
self.assertEqual(x.foo, y.foo)
- self.assertTrue(x[0] is y[0])
- self.assertTrue(x.foo is y.foo)
+ self.assertIs(x[0], y[0])
+ self.assertIs(x.foo, y.foo)
def test_deepcopy_list_subclass(self):
class C(list):
@@ -571,8 +608,8 @@ class TestCopy(unittest.TestCase):
y = copy.deepcopy(x)
self.assertEqual(list(x), list(y))
self.assertEqual(x.foo, y.foo)
- self.assertTrue(x[0] is not y[0])
- self.assertTrue(x.foo is not y.foo)
+ self.assertIsNot(x[0], y[0])
+ self.assertIsNot(x.foo, y.foo)
def test_copy_tuple_subclass(self):
class C(tuple):
@@ -589,8 +626,8 @@ class TestCopy(unittest.TestCase):
self.assertEqual(tuple(x), ([1, 2], 3))
y = copy.deepcopy(x)
self.assertEqual(tuple(y), ([1, 2], 3))
- self.assertTrue(x is not y)
- self.assertTrue(x[0] is not y[0])
+ self.assertIsNot(x, y)
+ self.assertIsNot(x[0], y[0])
def test_getstate_exc(self):
class EvilState(object):
@@ -618,10 +655,10 @@ class TestCopy(unittest.TestCase):
obj = C()
x = weakref.ref(obj)
y = _copy(x)
- self.assertTrue(y is x)
+ self.assertIs(y, x)
del obj
y = _copy(x)
- self.assertTrue(y is x)
+ self.assertIs(y, x)
def test_copy_weakref(self):
self._check_weakref(copy.copy)
@@ -637,7 +674,7 @@ class TestCopy(unittest.TestCase):
u[a] = b
u[c] = d
v = copy.copy(u)
- self.assertFalse(v is u)
+ self.assertIsNot(v, u)
self.assertEqual(v, u)
self.assertEqual(v[a], b)
self.assertEqual(v[c], d)
@@ -667,8 +704,8 @@ class TestCopy(unittest.TestCase):
v = copy.deepcopy(u)
self.assertNotEqual(v, u)
self.assertEqual(len(v), 2)
- self.assertFalse(v[a] is b)
- self.assertFalse(v[c] is d)
+ self.assertIsNot(v[a], b)
+ self.assertIsNot(v[c], d)
self.assertEqual(v[a].i, b.i)
self.assertEqual(v[c].i, d.i)
del c
@@ -687,12 +724,12 @@ class TestCopy(unittest.TestCase):
self.assertNotEqual(v, u)
self.assertEqual(len(v), 2)
(x, y), (z, t) = sorted(v.items(), key=lambda pair: pair[0].i)
- self.assertFalse(x is a)
+ self.assertIsNot(x, a)
self.assertEqual(x.i, a.i)
- self.assertTrue(y is b)
- self.assertFalse(z is c)
+ self.assertIs(y, b)
+ self.assertIsNot(z, c)
self.assertEqual(z.i, c.i)
- self.assertTrue(t is d)
+ self.assertIs(t, d)
del x, y, z, t
del d
self.assertEqual(len(v), 1)
@@ -705,7 +742,7 @@ class TestCopy(unittest.TestCase):
f.b = f.m
g = copy.deepcopy(f)
self.assertEqual(g.m, g.b)
- self.assertTrue(g.b.__self__ is g)
+ self.assertIs(g.b.__self__, g)
g.b()
diff --git a/Lib/test/test_cprofile.py b/Lib/test/test_cprofile.py
index ae17c2b..5676668 100644
--- a/Lib/test/test_cprofile.py
+++ b/Lib/test/test_cprofile.py
@@ -18,16 +18,19 @@ class CProfileTest(ProfileTest):
def test_bad_counter_during_dealloc(self):
import _lsprof
# Must use a file as StringIO doesn't trigger the bug.
- with open(TESTFN, 'w') as file:
- sys.stderr = file
- try:
- obj = _lsprof.Profiler(lambda: int)
- obj.enable()
- obj = _lsprof.Profiler(1)
- obj.disable()
- finally:
- sys.stderr = sys.__stderr__
- unlink(TESTFN)
+ orig_stderr = sys.stderr
+ try:
+ with open(TESTFN, 'w') as file:
+ sys.stderr = file
+ try:
+ obj = _lsprof.Profiler(lambda: int)
+ obj.enable()
+ obj = _lsprof.Profiler(1)
+ obj.disable()
+ finally:
+ sys.stderr = orig_stderr
+ finally:
+ unlink(TESTFN)
def test_main():
diff --git a/Lib/test/test_crashers.py b/Lib/test/test_crashers.py
new file mode 100644
index 0000000..336ccbe
--- /dev/null
+++ b/Lib/test/test_crashers.py
@@ -0,0 +1,38 @@
+# Tests that the crashers in the Lib/test/crashers directory actually
+# do crash the interpreter as expected
+#
+# If a crasher is fixed, it should be moved elsewhere in the test suite to
+# ensure it continues to work correctly.
+
+import unittest
+import glob
+import os.path
+import test.support
+from test.script_helper import assert_python_failure
+
+CRASHER_DIR = os.path.join(os.path.dirname(__file__), "crashers")
+CRASHER_FILES = os.path.join(CRASHER_DIR, "*.py")
+
+infinite_loops = ["infinite_loop_re.py", "nasty_eq_vs_dict.py"]
+
+class CrasherTest(unittest.TestCase):
+
+ @unittest.skip("these tests are too fragile")
+ @test.support.cpython_only
+ def test_crashers_crash(self):
+ for fname in glob.glob(CRASHER_FILES):
+ if os.path.basename(fname) in infinite_loops:
+ continue
+ # Some "crashers" only trigger an exception rather than a
+ # segfault. Consider that an acceptable outcome.
+ if test.support.verbose:
+ print("Checking crasher:", fname)
+ assert_python_failure(fname)
+
+
+def test_main():
+ test.support.run_unittest(CrasherTest)
+ test.support.reap_children()
+
+if __name__ == "__main__":
+ test_main()
diff --git a/Lib/test/test_crypt.py b/Lib/test/test_crypt.py
index 2adb28d..dc107d8 100644
--- a/Lib/test/test_crypt.py
+++ b/Lib/test/test_crypt.py
@@ -10,6 +10,25 @@ class CryptTestCase(unittest.TestCase):
if support.verbose:
print('Test encryption: ', c)
+ def test_salt(self):
+ self.assertEqual(len(crypt._saltchars), 64)
+ for method in crypt.methods:
+ salt = crypt.mksalt(method)
+ self.assertEqual(len(salt),
+ method.salt_chars + (3 if method.ident else 0))
+
+ def test_saltedcrypt(self):
+ for method in crypt.methods:
+ pw = crypt.crypt('assword', method)
+ self.assertEqual(len(pw), method.total_size)
+ pw = crypt.crypt('assword', crypt.mksalt(method))
+ self.assertEqual(len(pw), method.total_size)
+
+ def test_methods(self):
+ # Gurantee that METHOD_CRYPT is the last method in crypt.methods.
+ self.assertTrue(len(crypt.methods) >= 1)
+ self.assertEqual(crypt.METHOD_CRYPT, crypt.methods[-1])
+
def test_main():
support.run_unittest(CryptTestCase)
diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py
index 5812147..e959622 100644
--- a/Lib/test/test_curses.py
+++ b/Lib/test/test_curses.py
@@ -264,11 +264,53 @@ def test_issue6243(stdscr):
curses.ungetch(1025)
stdscr.getkey()
+def test_unget_wch(stdscr):
+ if not hasattr(curses, 'unget_wch'):
+ return
+ encoding = stdscr.encoding
+ for ch in ('a', '\xe9', '\u20ac', '\U0010FFFF'):
+ try:
+ ch.encode(encoding)
+ except UnicodeEncodeError:
+ continue
+ try:
+ curses.unget_wch(ch)
+ except Exception as err:
+ raise Exception("unget_wch(%a) failed with encoding %s: %s"
+ % (ch, stdscr.encoding, err))
+ read = stdscr.get_wch()
+ if read != ch:
+ raise AssertionError("%r != %r" % (read, ch))
+
+ code = ord(ch)
+ curses.unget_wch(code)
+ read = stdscr.get_wch()
+ if read != ch:
+ raise AssertionError("%r != %r" % (read, ch))
+
def test_issue10570():
b = curses.tparm(curses.tigetstr("cup"), 5, 3)
assert type(b) is bytes
curses.putp(b)
+def test_encoding(stdscr):
+ import codecs
+ encoding = stdscr.encoding
+ codecs.lookup(encoding)
+ try:
+ stdscr.encoding = 10
+ except TypeError:
+ pass
+ else:
+ raise AssertionError("TypeError not raised")
+ stdscr.encoding = encoding
+ try:
+ del stdscr.encoding
+ except TypeError:
+ pass
+ else:
+ raise AssertionError("TypeError not raised")
+
def main(stdscr):
curses.savetty()
try:
@@ -277,16 +319,18 @@ def main(stdscr):
test_userptr_without_set(stdscr)
test_resize_term(stdscr)
test_issue6243(stdscr)
+ test_unget_wch(stdscr)
test_issue10570()
+ test_encoding(stdscr)
finally:
curses.resetty()
def test_main():
- if not sys.stdout.isatty():
- raise unittest.SkipTest("sys.stdout is not a tty")
+ if not sys.__stdout__.isatty():
+ raise unittest.SkipTest("sys.__stdout__ is not a tty")
# testing setupterm() inside initscr/endwin
# causes terminal breakage
- curses.setupterm(fd=sys.stdout.fileno())
+ curses.setupterm(fd=sys.__stdout__.fileno())
try:
stdscr = curses.initscr()
main(stdscr)
diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py