summaryrefslogtreecommitdiffstats
path: root/Lib/compiler/pycodegen.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/compiler/pycodegen.py')
-rw-r--r--Lib/compiler/pycodegen.py5
1 files changed, 5 insertions, 0 deletions
diff --git a/Lib/compiler/pycodegen.py b/Lib/compiler/pycodegen.py
index bfb5059..0097482 100644
--- a/Lib/compiler/pycodegen.py
+++ b/Lib/compiler/pycodegen.py
@@ -909,6 +909,11 @@ class CodeGenerator:
self.visit(node.value)
self.emit('RETURN_VALUE')
+ def visitYield(self, node):
+ self.set_lineno(node)
+ self.visit(node.value)
+ self.emit('YIELD_STMT')
+
# slice and subscript stuff
def visitSlice(self, node, aug_flag=None):
oc/tut/tut.tex | 72 +- Doc/whatsnew/whatsnew20.tex | 4 +- Doc/whatsnew/whatsnew21.tex | 17 +- Doc/whatsnew/whatsnew24.tex | 8 +- Doc/whatsnew/whatsnew25.tex | 1451 +++++- Grammar/Grammar | 7 +- Include/Python-ast.h | 46 +- Include/abstract.h | 10 + Include/asdl.h | 8 + Include/code.h | 17 +- Include/compile.h | 2 +- Include/genobject.h | 2 + Include/modsupport.h | 7 +- Include/object.h | 41 +- Include/objimpl.h | 22 +- Include/pymem.h | 17 +- Include/pyport.h | 60 +- Include/setobject.h | 3 + Include/sliceobject.h | 1 + LICENSE | 2 + Lib/Queue.py | 46 + Lib/SimpleXMLRPCServer.py | 8 +- Lib/__future__.py | 4 +- Lib/_threading_local.py | 72 +- Lib/bdb.py | 6 +- Lib/bsddb/__init__.py | 18 +- Lib/bsddb/test/test_all.py | 1 + Lib/bsddb/test/test_pickle.py | 75 + Lib/calendar.py | 709 ++- Lib/codecs.py | 36 +- Lib/contextlib.py | 20 +- Lib/copy_reg.py | 15 +- Lib/ctypes/__init__.py | 31 +- Lib/ctypes/_loader.py | 12 +- Lib/ctypes/test/test_byteswap.py | 70 +- Lib/ctypes/test/test_cfuncs.py | 2 +- Lib/ctypes/test/test_keeprefs.py | 5 + Lib/ctypes/test/test_loading.py | 80 +- Lib/ctypes/test/test_pointers.py | 12 + Lib/ctypes/test/test_posix.py | 6 +- Lib/ctypes/test/test_prototypes.py | 16 +- Lib/ctypes/test/test_random_things.py | 10 +- Lib/ctypes/test/test_sizes.py | 3 + Lib/ctypes/test/test_unaligned_structures.py | 45 + Lib/distutils/command/build_ext.py | 13 +- Lib/distutils/command/install.py | 1 + Lib/distutils/command/install_egg_info.py | 75 + Lib/distutils/command/upload.py | 11 +- Lib/distutils/log.py | 7 +- Lib/distutils/sysconfig.py | 21 +- Lib/doctest.py | 26 +- Lib/dummy_thread.py | 8 + Lib/easy_install.py | 5 + Lib/email/Charset.py | 370 -- Lib/email/Encoders.py | 78 - Lib/email/Errors.py | 53 - Lib/email/FeedParser.py | 477 -- Lib/email/Generator.py | 352 -- Lib/email/Header.py | 495 -- Lib/email/Iterators.py | 67 - Lib/email/MIMEAudio.py | 72 - Lib/email/MIMEBase.py | 24 - Lib/email/MIMEImage.py | 45 - Lib/email/MIMEMessage.py | 32 - Lib/email/MIMEMultipart.py | 39 - Lib/email/MIMENonMultipart.py | 24 - Lib/email/MIMEText.py | 28 - Lib/email/Message.py | 814 --- Lib/email/Parser.py | 88 - Lib/email/Utils.py | 291 -- Lib/email/__init__.py | 77 +- Lib/email/_parseaddr.py | 10 +- Lib/email/base64MIME.py | 172 - Lib/email/base64mime.py | 184 + Lib/email/charset.py | 388 ++ Lib/email/encoders.py | 88 + Lib/email/errors.py | 57 + Lib/email/feedparser.py | 480 ++ Lib/email/generator.py | 348 ++ Lib/email/header.py | 502 ++ Lib/email/iterators.py | 73 + Lib/email/message.py | 773 +++ Lib/email/mime/__init__.py | 0 Lib/email/mime/application.py | 36 + Lib/email/mime/audio.py | 73 + Lib/email/mime/base.py | 26 + Lib/email/mime/image.py | 46 + Lib/email/mime/message.py | 34 + Lib/email/mime/multipart.py | 41 + Lib/email/mime/nonmultipart.py | 26 + Lib/email/mime/text.py | 30 + Lib/email/parser.py | 91 + Lib/email/quopriMIME.py | 318 -- Lib/email/quoprimime.py | 336 ++ Lib/email/test/test_email.py | 129 +- Lib/email/test/test_email_codecs.py | 7 + Lib/email/test/test_email_codecs_renamed.py | 77 + Lib/email/test/test_email_renamed.py | 3078 ++++++++++++ Lib/email/utils.py | 306 ++ Lib/encodings/big5.py | 41 +- Lib/encodings/big5hkscs.py | 41 +- Lib/encodings/cp932.py | 41 +- Lib/encodings/cp949.py | 41 +- Lib/encodings/cp950.py | 41 +- Lib/encodings/euc_jis_2004.py | 41 +- Lib/encodings/euc_jisx0213.py | 41 +- Lib/encodings/euc_jp.py | 41 +- Lib/encodings/euc_kr.py | 41 +- Lib/encodings/gb18030.py | 41 +- Lib/encodings/gb2312.py | 41 +- Lib/encodings/gbk.py | 41 +- Lib/encodings/hz.py | 41 +- Lib/encodings/idna.py | 100 +- Lib/encodings/iso2022_jp.py | 41 +- Lib/encodings/iso2022_jp_1.py | 41 +- Lib/encodings/iso2022_jp_2.py | 41 +- Lib/encodings/iso2022_jp_2004.py | 41 +- Lib/encodings/iso2022_jp_3.py | 41 +- Lib/encodings/iso2022_jp_ext.py | 41 +- Lib/encodings/iso2022_kr.py | 41 +- Lib/encodings/johab.py | 41 +- Lib/encodings/shift_jis.py | 41 +- Lib/encodings/shift_jis_2004.py | 41 +- Lib/encodings/shift_jisx0213.py | 41 +- Lib/getpass.py | 25 +- Lib/glob.py | 2 +- Lib/idlelib/IOBinding.py | 1 + Lib/idlelib/NEWS.txt | 11 +- Lib/idlelib/idlever.py | 2 +- Lib/inspect.py | 6 +- Lib/lib-old/Para.py | 343 -- Lib/lib-old/addpack.py | 67 - Lib/lib-old/cmp.py | 63 - Lib/lib-old/cmpcache.py | 64 - Lib/lib-old/codehack.py | 81 - Lib/lib-old/dircmp.py | 202 - Lib/lib-old/dump.py | 63 - Lib/lib-old/find.py | 26 - Lib/lib-old/fmt.py | 623 --- Lib/lib-old/grep.py | 79 - Lib/lib-old/lockfile.py | 15 - Lib/lib-old/newdir.py | 73 - Lib/lib-old/ni.py | 433 -- Lib/lib-old/packmail.py | 111 - Lib/lib-old/poly.py | 52 - Lib/lib-old/rand.py | 13 - Lib/lib-old/statcache.py | 82 - Lib/lib-old/tb.py | 177 - Lib/lib-old/tzparse.py | 98 - Lib/lib-old/util.py | 25 - Lib/lib-old/whatsound.py | 1 - Lib/lib-old/whrandom.py | 144 - Lib/lib-old/zmod.py | 94 - Lib/lib-tk/Tix.py | 98 +- Lib/lib-tk/Tkinter.py | 24 +- Lib/lib-tk/tkFont.py | 4 +- Lib/linecache.py | 36 +- Lib/logging/__init__.py | 2 +- Lib/mimetools.py | 5 +- Lib/mimetypes.py | 321 +- Lib/pdb.py | 125 +- Lib/pkg_resources.py | 2377 +++++++++ Lib/pkgutil.py | 425 ++ Lib/plat-mac/applesingle.py | 2 +- Lib/platform.py | 3 +- Lib/popen2.py | 37 +- Lib/pstats.py | 144 +- Lib/pydoc.py | 182 +- Lib/random.py | 34 +- Lib/reconvert.py | 192 - Lib/regex_syntax.py | 53 - Lib/regsub.py | 198 - Lib/rexec.py | 2 +- Lib/runpy.py | 377 +- Lib/setuptools.egg-info/PKG-INFO | 89 + Lib/setuptools.egg-info/entry_points.txt | 51 + Lib/setuptools.egg-info/top_level.txt | 3 + Lib/setuptools.egg-info/zip-safe | 0 Lib/setuptools/__init__.py | 64 + Lib/setuptools/archive_util.py | 200 + Lib/setuptools/cli.exe | Bin 0 -> 6144 bytes Lib/setuptools/command/__init__.py | 19 + Lib/setuptools/command/alias.py | 79 + Lib/setuptools/command/bdist_egg.py | 449 ++ Lib/setuptools/command/bdist_rpm.py | 37 + Lib/setuptools/command/build_ext.py | 285 ++ Lib/setuptools/command/build_py.py | 192 + Lib/setuptools/command/develop.py | 116 + Lib/setuptools/command/easy_install.py | 1555 ++++++ Lib/setuptools/command/egg_info.py | 365 ++ Lib/setuptools/command/install.py | 101 + Lib/setuptools/command/install_egg_info.py | 81 + Lib/setuptools/command/install_lib.py | 76 + Lib/setuptools/command/install_scripts.py | 56 + Lib/setuptools/command/rotate.py | 57 + Lib/setuptools/command/saveopts.py | 24 + Lib/setuptools/command/sdist.py | 163 + Lib/setuptools/command/setopt.py | 158 + Lib/setuptools/command/test.py | 119 + Lib/setuptools/command/upload.py | 178 + Lib/setuptools/depends.py | 239 + Lib/setuptools/dist.py | 798 +++ Lib/setuptools/extension.py | 35 + Lib/setuptools/gui.exe | Bin 0 -> 6144 bytes Lib/setuptools/package_index.py | 674 +++ Lib/setuptools/sandbox.py | 203 + Lib/setuptools/site-patch.py | 74 + Lib/setuptools/tests/__init__.py | 364 ++ Lib/setuptools/tests/api_tests.txt | 330 ++ Lib/setuptools/tests/test_resources.py | 483 ++ Lib/sgmllib.py | 34 +- Lib/site.py | 2 + Lib/smtplib.py | 6 +- Lib/socket.py | 12 +- Lib/sqlite3/__init__.py | 24 + Lib/sqlite3/dbapi2.py | 84 + Lib/sqlite3/test/__init__.py | 0 Lib/sqlite3/test/dbapi.py | 732 +++ Lib/sqlite3/test/factory.py | 164 + Lib/sqlite3/test/hooks.py | 115 + Lib/sqlite3/test/regression.py | 48 + Lib/sqlite3/test/transactions.py | 156 + Lib/sqlite3/test/types.py | 339 ++ Lib/sqlite3/test/userfunctions.py | 330 ++ Lib/sre.py | 10 + Lib/subprocess.py | 36 +- Lib/telnetlib.py | 2 +- Lib/test/check_soundcard.vbs | 13 + Lib/test/crashers/README | 5 + Lib/test/crashers/dictresize_attack.py | 32 + Lib/test/crashers/nasty_eq_vs_dict.py | 47 + Lib/test/empty.vbs | 1 + Lib/test/fork_wait.py | 71 + Lib/test/leakers/README.txt | 13 + Lib/test/leakers/test_ctypes.py | 16 + Lib/test/leakers/test_selftype.py | 13 + Lib/test/leakers/test_tee.py | 19 - Lib/test/output/test_augassign | 54 - Lib/test/output/test_coercion | 1054 ---- Lib/test/output/test_compare | 101 - Lib/test/regrtest.py | 67 +- Lib/test/test___all__.py | 4 - Lib/test/test_applesingle.py | 4 +- Lib/test/test_array.py | 2 +- Lib/test/test_ast.py | 11 +- Lib/test/test_audioop.py | 24 +- Lib/test/test_augassign.py | 566 ++- Lib/test/test_bsddb.py | 9 +- Lib/test/test_builtin.py | 4 + Lib/test/test_calendar.py | 265 +- Lib/test/test_capi.py | 87 +- Lib/test/test_cmd_line.py | 3 + Lib/test/test_codecs.py | 93 +- Lib/test/test_coercion.py | 324 +- Lib/test/test_compare.py | 51 +- Lib/test/test_compile.py | 4 + Lib/test/test_compiler.py | 13 +- Lib/test/test_contextlib.py | 69 +- Lib/test/test_copy_reg.py | 29 + Lib/test/test_curses.py | 15 +- Lib/test/test_datetime.py | 11 + Lib/test/test_decimal.py | 50 +- Lib/test/test_descr.py | 12 +- Lib/test/test_difflib.py | 8 +- Lib/test/test_dl.py | 1 + Lib/test/test_doctest.py | 46 +- Lib/test/test_email_renamed.py | 13 + Lib/test/test_file.py | 16 +- Lib/test/test_fileinput.py | 5 +- Lib/test/test_fork1.py | 76 +- Lib/test/test_generators.py | 98 +- Lib/test/test_genexps.py | 2 +- Lib/test/test_getargs2.py | 19 +- Lib/test/test_glob.py | 8 + Lib/test/test_grammar.py | 9 + Lib/test/test_index.py | 137 + Lib/test/test_inspect.py | 6 +- Lib/test/test_mimetypes.py | 1 + Lib/test/test_multibytecodec.py | 135 +- Lib/test/test_multibytecodec_support.py | 210 +- Lib/test/test_optparse.py | 7 + Lib/test/test_parser.py | 4 + Lib/test/test_platform.py | 7 +- Lib/test/test_popen2.py | 4 + Lib/test/test_posix.py | 5 + Lib/test/test_pty.py | 28 +- Lib/test/test_pyclbr.py | 5 +- Lib/test/test_queue.py | 44 + Lib/test/test_quopri.py | 21 +- Lib/test/test_random.py | 21 +- Lib/test/test_regex.py | 113 - Lib/test/test_set.py | 4 +- Lib/test/test_setuptools.py | 16 + Lib/test/test_sgmllib.py | 14 + Lib/test/test_socket.py | 40 +- Lib/test/test_socket_ssl.py | 39 + Lib/test/test_sqlite.py | 16 + Lib/test/test_startfile.py | 37 + Lib/test/test_sundry.py | 54 +- Lib/test/test_sys.py | 5 + Lib/test/test_timeout.py | 5 +- Lib/test/test_tokenize.py | 134 +- Lib/test/test_trace.py | 62 +- Lib/test/test_traceback.py | 10 + Lib/test/test_unicode.py | 16 + Lib/test/test_urllib2.py | 38 +- Lib/test/test_urlparse.py | 101 + Lib/test/test_wait3.py | 32 + Lib/test/test_wait4.py | 29 + Lib/test/test_warnings.py | 4 + Lib/test/test_winsound.py | 149 +- Lib/test/test_with.py | 56 + Lib/test/test_xrange.py | 7 +- Lib/test/test_zipimport.py | 83 +- Lib/tokenize.py | 2 +- Lib/traceback.py | 10 +- Lib/urllib.py | 4 +- Lib/urllib2.py | 205 +- Lib/urlparse.py | 131 +- Lib/uu.py | 4 +- Lib/warnings.py | 10 +- Lib/xmlcore/dom/expatbuilder.py | 14 +- Lib/xmlcore/dom/minicompat.py | 175 +- Lib/xmlcore/dom/minidom.py | 16 +- Lib/xmlcore/dom/xmlbuilder.py | 6 +- Mac/Modules/carbonevt/_CarbonEvtmodule.c | 4 +- Mac/Modules/cf/_CFmodule.c | 4 +- Mac/Modules/gestaltmodule.c | 9 +- Mac/Modules/qd/_Qdmodule.c | 5 +- Makefile.pre.in | 56 +- Misc/ACKS | 3 + Misc/BeOS-setup.py | 2 - Misc/HISTORY | 5287 ++++++++++++++++++++ Misc/README.valgrind | 6 + Misc/RPM/python-2.4.spec | 382 -- Misc/RPM/python-2.5.spec | 385 ++ Misc/SpecialBuilds.txt | 4 +- Misc/build.sh | 24 +- Misc/cheatsheet | 3 - Misc/developers.txt | 6 + Misc/python-config.in | 50 + Modules/Setup.dist | 4 +- Modules/_bsddb.c | 28 +- Modules/_csv.c | 63 +- Modules/_ctypes/_ctypes.c | 255 +- Modules/_ctypes/_ctypes_test.c | 72 +- Modules/_ctypes/callbacks.c | 52 +- Modules/_ctypes/callproc.c | 100 +- Modules/_ctypes/cfield.c | 155 +- Modules/_ctypes/ctypes.h | 39 +- Modules/_ctypes/libffi/configure | 1 + Modules/_ctypes/libffi/configure.ac | 1 + .../_ctypes/libffi/src/powerpc/darwin_closure.S | 2 +- Modules/_ctypes/libffi/src/x86/ffi.c | 4 +- Modules/_ctypes/stgdict.c | 45 +- Modules/_hotshot.c | 8 +- Modules/_lsprof.c | 10 +- Modules/_sqlite/adapters.c | 40 + Modules/_sqlite/adapters.h | 33 + Modules/_sqlite/cache.c | 362 ++ Modules/_sqlite/cache.h | 61 + Modules/_sqlite/connection.c | 1082 ++++ Modules/_sqlite/connection.h | 106 + Modules/_sqlite/converters.c | 40 + Modules/_sqlite/converters.h | 33 + Modules/_sqlite/cursor.c | 1027 ++++ Modules/_sqlite/cursor.h | 71 + Modules/_sqlite/microprotocols.c | 142 + Modules/_sqlite/microprotocols.h | 59 + Modules/_sqlite/module.c | 325 ++ Modules/_sqlite/module.h | 55 + Modules/_sqlite/prepare_protocol.c | 84 + Modules/_sqlite/prepare_protocol.h | 41 + Modules/_sqlite/row.c | 202 + Modules/_sqlite/row.h | 39 + Modules/_sqlite/sqlitecompat.h | 34 + Modules/_sqlite/statement.c | 427 ++ Modules/_sqlite/statement.h | 58 + Modules/_sqlite/util.c | 96 + Modules/_sqlite/util.h | 38 + Modules/_sre.c | 407 +- Modules/_ssl.c | 24 +- Modules/_testcapimodule.c | 114 +- Modules/_tkinter.c | 2 +- Modules/almodule.c | 4 +- Modules/arraymodule.c | 13 +- Modules/audioop.c | 2483 ++++----- Modules/cPickle.c | 92 +- Modules/cStringIO.c | 26 +- Modules/ccpython.cc | 11 - Modules/cjkcodecs/_codecs_cn.c | 5 +- Modules/cjkcodecs/multibytecodec.c | 1358 +++-- Modules/cjkcodecs/multibytecodec.h | 60 +- Modules/collectionsmodule.c | 21 +- Modules/config.c.in | 10 + Modules/datetimemodule.c | 39 +- Modules/functionalmodule.c | 2 +- Modules/gcmodule.c | 75 +- Modules/getpath.c | 13 +- Modules/grpmodule.c | 5 +- Modules/itertoolsmodule.c | 91 +- Modules/main.c | 13 +- Modules/md5.c | 381 ++ Modules/md5.h | 123 +- Modules/md5c.c | 289 -- Modules/md5module.c | 16 +- Modules/operator.c | 6 +- Modules/ossaudiodev.c | 16 +- Modules/parsermodule.c | 41 +- Modules/posixmodule.c | 424 +- Modules/pwdmodule.c | 6 +- Modules/pyexpat.c | 16 +- Modules/regexmodule.c | 690 --- Modules/regexpr.c | 2094 -------- Modules/regexpr.h | 155 - Modules/resource.c | 7 +- Modules/socketmodule.c | 76 +- Modules/spwdmodule.c | 6 +- Modules/stropmodule.c | 66 +- Modules/threadmodule.c | 121 +- Modules/timemodule.c | 9 +- Modules/unicodedata.c | 12 +- Modules/xxsubtype.c | 4 - Modules/zipimport.c | 11 +- Modules/zlibmodule.c | 4 +- Objects/abstract.c | 138 +- Objects/boolobject.c | 72 +- Objects/bufferobject.c | 4 +- Objects/cellobject.c | 25 +- Objects/classobject.c | 236 +- Objects/cobject.c | 41 +- Objects/codeobject.c | 133 + Objects/complexobject.c | 12 +- Objects/descrobject.c | 52 +- Objects/dictobject.c | 43 +- Objects/enumobject.c | 19 +- Objects/fileobject.c | 33 +- Objects/floatobject.c | 34 +- Objects/frameobject.c | 96 +- Objects/funcobject.c | 63 +- Objects/genobject.c | 80 +- Objects/intobject.c | 67 +- Objects/iterobject.c | 14 +- Objects/listobject.c | 201 +- Objects/longobject.c | 88 +- Objects/methodobject.c | 13 +- Objects/moduleobject.c | 3 +- Objects/object.c | 94 +- Objects/obmalloc.c | 2 +- Objects/rangeobject.c | 175 +- Objects/setobject.c | 123 +- Objects/sliceobject.c | 75 +- Objects/stringobject.c | 216 +- Objects/structseq.c | 10 +- Objects/tupleobject.c | 69 +- Objects/typeobject.c | 212 +- Objects/unicodeobject.c | 123 +- Objects/weakrefobject.c | 87 +- PC/VC6/pythoncore.dsp | 8 - PC/_subprocess.c | 2 +- PC/config.c | 2 - PC/dllbase_nt.txt | 1 + PC/os2emx/Makefile | 2 - PC/os2emx/config.c | 2 - PC/os2emx/python24.def | 13 - PC/os2vacpp/config.c | 2 - PC/os2vacpp/makefile | 28 - PC/os2vacpp/makefile.omk | 24 - PC/os2vacpp/python.def | 6 - PC/testpy.py | 10 +- PC/tix.diff | 108 - PCbuild/_ctypes.vcproj | 10 +- PCbuild/_sqlite3.vcproj | 289 ++ PCbuild/_ssl.mak | 6 +- PCbuild/db.build | 10 + PCbuild/make_buildinfo.c | 3 + PCbuild/pcbuild.sln | 14 + PCbuild/python.build | 21 + PCbuild/pythoncore.vcproj | 10 +- PCbuild/readme.txt | 195 +- Parser/Python.asdl | 7 +- Parser/asdl_c.py | 94 +- Parser/bitset.c | 4 +- Parser/firstsets.c | 7 +- Parser/grammar.c | 15 +- Parser/myreadline.c | 6 +- Parser/node.c | 2 +- Parser/parser.c | 6 +- Parser/pgen.c | 27 +- Parser/pgenmain.c | 6 +- Parser/tokenizer.c | 110 +- Python/Python-ast.c | 313 +- Python/asdl.c | 19 +- Python/ast.c | 179 +- Python/bltinmodule.c | 45 +- Python/ceval.c | 150 +- Python/codecs.c | 165 +- Python/compile.c | 135 +- Python/dynload_win.c | 6 + Python/errors.c | 11 + Python/exceptions.c | 9 +- Python/future.c | 10 +- Python/getargs.c | 33 +- Python/getmtime.c | 9 + Python/getopt.c | 9 + Python/graminit.c | 4 +- Python/import.c | 25 +- Python/mactoolboxglue.c | 8 +- Python/modsupport.c | 98 +- Python/pyarena.c | 46 +- Python/pystate.c | 71 +- Python/pystrtod.c | 2 +- Python/pythonrun.c | 147 +- Python/symtable.c | 24 +- Python/sysmodule.c | 20 +- Python/thread_pthread.h | 12 +- Python/traceback.c | 20 +- README | 690 +-- RISCOS/Makefile | 5 - Tools/bgen/bgen/bgenObjectDefinition.py | 5 +- Tools/buildbot/build.bat | 1 + Tools/buildbot/external.bat | 21 + Tools/buildbot/kill_python.bat | 3 + Tools/buildbot/kill_python.c | 56 + Tools/buildbot/kill_python.mak | 2 + Tools/i18n/msgfmt.py | 2 +- Tools/msi/msi.py | 51 +- Tools/msi/uuids.py | 33 + Tools/pybench/Arithmetic.py | 778 +++ Tools/pybench/Calls.py | 410 ++ Tools/pybench/CommandLine.py | 634 +++ Tools/pybench/Constructs.py | 565 +++ Tools/pybench/Dict.py | 503 ++ Tools/pybench/Exceptions.py | 681 +++ Tools/pybench/Imports.py | 139 + Tools/pybench/Instances.py | 68 + Tools/pybench/LICENSE | 25 + Tools/pybench/Lists.py | 292 ++ Tools/pybench/Lookups.py | 946 ++++ Tools/pybench/Numbers.py | 784 +++ Tools/pybench/README | 372 ++ Tools/pybench/Setup.py | 35 + Tools/pybench/Strings.py | 564 +++ Tools/pybench/Tuples.py | 365 ++ Tools/pybench/Unicode.py | 542 ++ Tools/pybench/package/__init__.py | 0 Tools/pybench/package/submodule.py | 0 Tools/pybench/pybench.py | 461 ++ Tools/scripts/byext.py | 16 +- Tools/scripts/classfix.py | 12 +- Tools/scripts/fixcid.py | 12 +- Tools/scripts/ifdef.py | 1 - Tools/scripts/methfix.py | 10 +- Tools/scripts/objgraph.py | 4 +- Tools/scripts/pathfix.py | 4 +- Tools/scripts/pdeps.py | 6 +- Tools/unicode/Makefile | 5 +- Tools/unicode/gencjkcodecs.py | 68 + Tools/unicode/gencodec.py | 2 +- configure | 1231 ++--- configure.in | 148 +- pyconfig.h.in | 6 + setup.py | 367 +- 660 files changed, 56521 insertions(+), 22689 deletions(-) create mode 100644 Demo/tix/grid.py create mode 100644 Doc/lib/libcontextlib.tex delete mode 100644 Doc/lib/libreconvert.tex delete mode 100644 Doc/lib/libregex.tex delete mode 100644 Doc/lib/libregsub.tex create mode 100644 Doc/lib/librunpy.tex create mode 100644 Lib/bsddb/test/test_pickle.py create mode 100644 Lib/ctypes/test/test_unaligned_structures.py create mode 100644 Lib/distutils/command/install_egg_info.py create mode 100644 Lib/easy_install.py delete mode 100644 Lib/email/Charset.py delete mode 100644 Lib/email/Encoders.py delete mode 100644 Lib/email/Errors.py delete mode 100644 Lib/email/FeedParser.py delete mode 100644 Lib/email/Generator.py delete mode 100644 Lib/email/Header.py delete mode 100644 Lib/email/Iterators.py delete mode 100644 Lib/email/MIMEAudio.py delete mode 100644 Lib/email/MIMEBase.py delete mode 100644 Lib/email/MIMEImage.py delete mode 100644 Lib/email/MIMEMessage.py delete mode 100644 Lib/email/MIMEMultipart.py delete mode 100644 Lib/email/MIMENonMultipart.py delete mode 100644 Lib/email/MIMEText.py delete mode 100644 Lib/email/Message.py delete mode 100644 Lib/email/Parser.py delete mode 100644 Lib/email/Utils.py delete mode 100644 Lib/email/base64MIME.py create mode 100644 Lib/email/base64mime.py create mode 100644 Lib/email/charset.py create mode 100644 Lib/email/encoders.py create mode 100644 Lib/email/errors.py create mode 100644 Lib/email/feedparser.py create mode 100644 Lib/email/generator.py create mode 100644 Lib/email/header.py create mode 100644 Lib/email/iterators.py create mode 100644 Lib/email/message.py create mode 100644 Lib/email/mime/__init__.py create mode 100644 Lib/email/mime/application.py create mode 100644 Lib/email/mime/audio.py create mode 100644 Lib/email/mime/base.py create mode 100644 Lib/email/mime/image.py create mode 100644 Lib/email/mime/message.py create mode 100644 Lib/email/mime/multipart.py create mode 100644 Lib/email/mime/nonmultipart.py create mode 100644 Lib/email/mime/text.py create mode 100644 Lib/email/parser.py delete mode 100644 Lib/email/quopriMIME.py create mode 100644 Lib/email/quoprimime.py create mode 100644 Lib/email/test/test_email_codecs_renamed.py create mode 100644 Lib/email/test/test_email_renamed.py create mode 100644 Lib/email/utils.py delete mode 100644 Lib/lib-old/Para.py delete mode 100644 Lib/lib-old/addpack.py delete mode 100644 Lib/lib-old/cmp.py delete mode 100644 Lib/lib-old/cmpcache.py delete mode 100644 Lib/lib-old/codehack.py delete mode 100644 Lib/lib-old/dircmp.py delete mode 100644 Lib/lib-old/dump.py delete mode 100644 Lib/lib-old/find.py delete mode 100644 Lib/lib-old/fmt.py delete mode 100644 Lib/lib-old/grep.py delete mode 100644 Lib/lib-old/lockfile.py delete mode 100644 Lib/lib-old/newdir.py delete mode 100644 Lib/lib-old/ni.py delete mode 100644 Lib/lib-old/packmail.py delete mode 100644 Lib/lib-old/poly.py delete mode 100644 Lib/lib-old/rand.py delete mode 100644 Lib/lib-old/statcache.py delete mode 100644 Lib/lib-old/tb.py delete mode 100644 Lib/lib-old/tzparse.py delete mode 100644 Lib/lib-old/util.py delete mode 100644 Lib/lib-old/whatsound.py delete mode 100644 Lib/lib-old/whrandom.py delete mode 100644 Lib/lib-old/zmod.py create mode 100644 Lib/pkg_resources.py delete mode 100755 Lib/reconvert.py delete mode 100644 Lib/regex_syntax.py delete mode 100644 Lib/regsub.py create mode 100644 Lib/setuptools.egg-info/PKG-INFO create mode 100755 Lib/setuptools.egg-info/entry_points.txt create mode 100644 Lib/setuptools.egg-info/top_level.txt create mode 100644 Lib/setuptools.egg-info/zip-safe create mode 100644 Lib/setuptools/__init__.py create mode 100755 Lib/setuptools/archive_util.py create mode 100755 Lib/setuptools/cli.exe create mode 100644 Lib/setuptools/command/__init__.py create mode 100755 Lib/setuptools/command/alias.py create mode 100644 Lib/setuptools/command/bdist_egg.py create mode 100755 Lib/setuptools/command/bdist_rpm.py create mode 100644 Lib/setuptools/command/build_ext.py create mode 100644 Lib/setuptools/command/build_py.py create mode 100755 Lib/setuptools/command/develop.py create mode 100755 Lib/setuptools/command/easy_install.py create mode 100755 Lib/setuptools/command/egg_info.py create mode 100644 Lib/setuptools/command/install.py create mode 100755 Lib/setuptools/command/install_egg_info.py create mode 100644 Lib/setuptools/command/install_lib.py create mode 100755 Lib/setuptools/command/install_scripts.py create mode 100755 Lib/setuptools/command/rotate.py create mode 100755 Lib/setuptools/command/saveopts.py create mode 100755 Lib/setuptools/command/sdist.py create mode 100755 Lib/setuptools/command/setopt.py create mode 100644 Lib/setuptools/command/test.py create mode 100755 Lib/setuptools/command/upload.py create mode 100644 Lib/setuptools/depends.py create mode 100644 Lib/setuptools/dist.py create mode 100644 Lib/setuptools/extension.py create mode 100755 Lib/setuptools/gui.exe create mode 100755 Lib/setuptools/package_index.py create mode 100755 Lib/setuptools/sandbox.py create mode 100755 Lib/setuptools/site-patch.py create mode 100644 Lib/setuptools/tests/__init__.py create mode 100755 Lib/setuptools/tests/api_tests.txt create mode 100644 Lib/setuptools/tests/test_resources.py create mode 100644 Lib/sqlite3/__init__.py create mode 100644 Lib/sqlite3/dbapi2.py create mode 100644 Lib/sqlite3/test/__init__.py create mode 100644 Lib/sqlite3/test/dbapi.py create mode 100644 Lib/sqlite3/test/factory.py create mode 100644 Lib/sqlite3/test/hooks.py create mode 100644 Lib/sqlite3/test/regression.py create mode 100644 Lib/sqlite3/test/transactions.py create mode 100644 Lib/sqlite3/test/types.py create mode 100644 Lib/sqlite3/test/userfunctions.py create mode 100644 Lib/sre.py create mode 100644 Lib/test/check_soundcard.vbs create mode 100644 Lib/test/crashers/dictresize_attack.py create mode 100644 Lib/test/crashers/nasty_eq_vs_dict.py create mode 100644 Lib/test/empty.vbs create mode 100644 Lib/test/fork_wait.py create mode 100644 Lib/test/leakers/test_ctypes.py create mode 100644 Lib/test/leakers/test_selftype.py delete mode 100644 Lib/test/leakers/test_tee.py delete mode 100644 Lib/test/output/test_augassign delete mode 100644 Lib/test/output/test_coercion delete mode 100644 Lib/test/output/test_compare create mode 100644 Lib/test/test_email_renamed.py delete mode 100644 Lib/test/test_regex.py create mode 100644 Lib/test/test_setuptools.py create mode 100644 Lib/test/test_sqlite.py create mode 100644 Lib/test/test_startfile.py create mode 100644 Lib/test/test_wait3.py create mode 100644 Lib/test/test_wait4.py delete mode 100644 Misc/RPM/python-2.4.spec create mode 100644 Misc/RPM/python-2.5.spec create mode 100644 Misc/python-config.in create mode 100644 Modules/_sqlite/adapters.c create mode 100644 Modules/_sqlite/adapters.h create mode 100644 Modules/_sqlite/cache.c create mode 100644 Modules/_sqlite/cache.h create mode 100644 Modules/_sqlite/connection.c create mode 100644 Modules/_sqlite/connection.h create mode 100644 Modules/_sqlite/converters.c create mode 100644 Modules/_sqlite/converters.h create mode 100644 Modules/_sqlite/cursor.c create mode 100644 Modules/_sqlite/cursor.h create mode 100644 Modules/_sqlite/microprotocols.c create mode 100644 Modules/_sqlite/microprotocols.h create mode 100644 Modules/_sqlite/module.c create mode 100644 Modules/_sqlite/module.h create mode 100644 Modules/_sqlite/prepare_protocol.c create mode 100644 Modules/_sqlite/prepare_protocol.h create mode 100644 Modules/_sqlite/row.c create mode 100644 Modules/_sqlite/row.h create mode 100644 Modules/_sqlite/sqlitecompat.h create mode 100644 Modules/_sqlite/statement.c create mode 100644 Modules/_sqlite/statement.h create mode 100644 Modules/_sqlite/util.c create mode 100644 Modules/_sqlite/util.h delete mode 100644 Modules/ccpython.cc create mode 100644 Modules/md5.c delete mode 100644 Modules/md5c.c delete mode 100644 Modules/regexmodule.c delete mode 100644 Modules/regexpr.c delete mode 100644 Modules/regexpr.h delete mode 100644 PC/tix.diff create mode 100644 PCbuild/_sqlite3.vcproj create mode 100644 PCbuild/db.build create mode 100644 PCbuild/python.build create mode 100644 Tools/buildbot/kill_python.bat create mode 100644 Tools/buildbot/kill_python.c create mode 100644 Tools/buildbot/kill_python.mak create mode 100644 Tools/msi/uuids.py create mode 100644 Tools/pybench/Arithmetic.py create mode 100644 Tools/pybench/Calls.py create mode 100644 Tools/pybench/CommandLine.py create mode 100644 Tools/pybench/Constructs.py create mode 100644 Tools/pybench/Dict.py create mode 100644 Tools/pybench/Exceptions.py create mode 100644 Tools/pybench/Imports.py create mode 100644 Tools/pybench/Instances.py create mode 100644 Tools/pybench/LICENSE create mode 100644 Tools/pybench/Lists.py create mode 100644 Tools/pybench/Lookups.py create mode 100644 Tools/pybench/Numbers.py create mode 100644 Tools/pybench/README create mode 100644 Tools/pybench/Setup.py create mode 100644 Tools/pybench/Strings.py create mode 100644 Tools/pybench/Tuples.py create mode 100644 Tools/pybench/Unicode.py create mode 100644 Tools/pybench/package/__init__.py create mode 100644 Tools/pybench/package/submodule.py create mode 100755 Tools/pybench/pybench.py create mode 100644 Tools/unicode/gencjkcodecs.py diff --git a/Demo/parser/unparse.py b/Demo/parser/unparse.py index dd75c22..510cdb0 100644 --- a/Demo/parser/unparse.py +++ b/Demo/parser/unparse.py @@ -1,5 +1,8 @@ "Usage: unparse.py " import sys +import _ast +import cStringIO +import os class Unparser: """Methods in this class recursively traverse an AST and @@ -70,6 +73,18 @@ class Unparser: if a.asname: self.write(" as "+a.asname) + def _ImportFrom(self, t): + self.fill("from ") + self.write(t.module) + self.write(" import ") + for i, a in enumerate(t.names): + if i == 0: + self.write(", ") + self.write(a.name) + if a.asname: + self.write(" as "+a.asname) + # XXX(jpe) what is level for? + def _Assign(self, t): self.fill() for target in t.targets: @@ -88,6 +103,36 @@ class Unparser: if t.value: self.dispatch(t.value) + def _Pass(self, t): + self.fill("pass") + + def _Break(self, t): + self.fill("break") + + def _Continue(self, t): + self.fill("continue") + + def _Delete(self, t): + self.fill("del ") + self.dispatch(t.targets) + + def _Assert(self, t): + self.fill("assert ") + self.dispatch(t.test) + if t.msg: + self.write(", ") + self.dispatch(t.msg) + + def _Exec(self, t): + self.fill("exec ") + self.dispatch(t.body) + if t.globals: + self.write(" in ") + self.dispatch(t.globals) + if t.locals: + self.write(", ") + self.dispatch(t.locals) + def _Print(self, t): self.fill("print ") do_comma = False @@ -102,6 +147,67 @@ class Unparser: if not t.nl: self.write(",") + def _Global(self, t): + self.fill("global") + for i, n in enumerate(t.names): + if i != 0: + self.write(",") + self.write(" " + n) + + def _Yield(self, t): + self.fill("yield") + if t.value: + self.write(" (") + self.dispatch(t.value) + self.write(")") + + def _Raise(self, t): + self.fill('raise ') + if t.type: + self.dispatch(t.type) + if t.inst: + self.write(", ") + self.dispatch(t.inst) + if t.tback: + self.write(", ") + self.dispatch(t.tback) + + def _TryExcept(self, t): + self.fill("try") + self.enter() + self.dispatch(t.body) + self.leave() + + for ex in t.handlers: + self.dispatch(ex) + if t.orelse: + self.fill("else") + self.enter() + self.dispatch(t.orelse) + self.leave() + + def _TryFinally(self, t): + self.fill("try") + self.enter() + self.dispatch(t.body) + self.leave() + + self.fill("finally") + self.enter() + self.dispatch(t.finalbody) + self.leave() + + def _excepthandler(self, t): + self.fill("except ") + if t.type: + self.dispatch(t.type) + if t.name: + self.write(", ") + self.dispatch(t.name) + self.enter() + self.dispatch(t.body) + self.leave() + def _ClassDef(self, t): self.write("\n") self.fill("class "+t.name) @@ -119,9 +225,24 @@ class Unparser: self.write("\n") self.fill("def "+t.name + "(") self.dispatch(t.args) + self.write(")") + self.enter() + self.dispatch(t.body) + self.leave() + + def _For(self, t): + self.fill("for ") + self.dispatch(t.target) + self.write(" in ") + self.dispatch(t.iter) self.enter() self.dispatch(t.body) self.leave() + if t.orelse: + self.fill("else") + self.enter() + self.dispatch(t.orelse) + self.leave def _If(self, t): self.fill("if ") @@ -136,11 +257,9 @@ class Unparser: self.dispatch(t.orelse) self.leave() - def _For(self, t): - self.fill("for ") - self.dispatch(t.target) - self.write(" in ") - self.dispatch(t.iter) + def _While(self, t): + self.fill("while ") + self.dispatch(t.test) self.enter() self.dispatch(t.body) self.leave() @@ -150,6 +269,16 @@ class Unparser: self.dispatch(t.orelse) self.leave + def _With(self, t): + self.fill("with ") + self.dispatch(t.context_expr) + if t.optional_vars: + self.write(" as ") + self.dispatch(t.optional_vars) + self.enter() + self.dispatch(t.body) + self.leave() + # expr def _Str(self, tree): self.write(repr(tree.s)) @@ -157,6 +286,11 @@ class Unparser: def _Name(self, t): self.write(t.id) + def _Repr(self, t): + self.write("`") + self.dispatch(t.value) + self.write("`") + def _Num(self, t): self.write(repr(t.n)) @@ -167,6 +301,37 @@ class Unparser: self.write(", ") self.write("]") + def _ListComp(self, t): + self.write("[") + self.dispatch(t.elt) + for gen in t.generators: + self.dispatch(gen) + self.write("]") + + def _GeneratorExp(self, t): + self.write("(") + self.dispatch(t.elt) + for gen in t.generators: + self.dispatch(gen) + self.write(")") + + def _comprehension(self, t): + self.write(" for ") + self.dispatch(t.target) + self.write(" in ") + self.dispatch(t.iter) + for if_clause in t.ifs: + self.write(" if ") + self.dispatch(if_clause) + + def _IfExp(self, t): + self.dispatch(t.body) + self.write(" if ") + self.dispatch(t.test) + if t.orelse: + self.write(" else ") + self.dispatch(t.orelse) + def _Dict(self, t): self.write("{") for k,v in zip(t.keys, t.values): @@ -194,8 +359,8 @@ class Unparser: self.write(")") binop = { "Add":"+", "Sub":"-", "Mult":"*", "Div":"/", "Mod":"%", - "RShift":"<<", "BitOr":"|", "BitXor":"^", "BitAnd":"&", - "FloorDiv":"//"} + "LShift":">>", "RShift":"<<", "BitOr":"|", "BitXor":"^", "BitAnd":"&", + "FloorDiv":"//", "Pow": "**"} def _BinOp(self, t): self.write("(") self.dispatch(t.left) @@ -213,6 +378,15 @@ class Unparser: self.dispatch(e) self.write(")") + boolops = {_ast.And: 'and', _ast.Or: 'or'} + def _BoolOp(self, t): + self.write("(") + self.dispatch(t.values[0]) + for v in t.values[1:]: + self.write(" %s " % self.boolops[t.op.__class__]) + self.dispatch(v) + self.write(")") + def _Attribute(self,t): self.dispatch(t.value) self.write(".") @@ -234,12 +408,12 @@ class Unparser: if comma: self.write(", ") else: comma = True self.write("*") - self.dispatch(t.stararg) + self.dispatch(t.starargs) if t.kwargs: if comma: self.write(", ") else: comma = True self.write("**") - self.dispatch(t.stararg) + self.dispatch(t.kwargs) self.write(")") def _Subscript(self, t): @@ -249,6 +423,9 @@ class Unparser: self.write("]") # slice + def _Ellipsis(self, t): + self.write("...") + def _Index(self, t): self.dispatch(t.value) @@ -262,6 +439,12 @@ class Unparser: self.write(":") self.dispatch(t.step) + def _ExtSlice(self, t): + for i, d in enumerate(t.dims): + if i != 0: + self.write(': ') + self.dispatch(d) + # others def _arguments(self, t): first = True @@ -283,13 +466,51 @@ class Unparser: if t.kwarg: if first:first = False else: self.write(", ") - self.write("**"+self.kwarg) - self.write(")") + self.write("**"+t.kwarg) + + def _keyword(self, t): + self.write(t.arg) + self.write("=") + self.dispatch(t.value) + + def _Lambda(self, t): + self.write("lambda ") + self.dispatch(t.args) + self.write(": ") + self.dispatch(t.body) -def roundtrip(filename): +def roundtrip(filename, output=sys.stdout): source = open(filename).read() tree = compile(source, filename, "exec", 0x400) - Unparser(tree) + Unparser(tree, output) + + + +def testdir(a): + try: + names = [n for n in os.listdir(a) if n.endswith('.py')] + except OSError: + print >> sys.stderr, "Directory not readable: %s" % a + else: + for n in names: + fullname = os.path.join(a, n) + if os.path.isfile(fullname): + output = cStringIO.StringIO() + print 'Testing %s' % fullname + try: + roundtrip(fullname, output) + except Exception, e: + print ' Failed to compile, exception is %s' % repr(e) + elif os.path.isdir(fullname): + testdir(fullname) + +def main(args): + if args[0] == '--testdir': + for a in args[1:]: + testdir(a) + else: + for a in args: + roundtrip(a) if __name__=='__main__': - roundtrip(sys.argv[1]) + main(sys.argv[1:]) diff --git a/Demo/pdist/makechangelog.py b/Demo/pdist/makechangelog.py index b26f30b..1ffa588 100755 --- a/Demo/pdist/makechangelog.py +++ b/Demo/pdist/makechangelog.py @@ -6,7 +6,7 @@ import sys import string -import regex +import re import getopt import time @@ -35,9 +35,9 @@ def main(): for rev in allrevs: formatrev(rev, prefix) -parsedateprog = regex.compile( - '^date: \([0-9]+\)/\([0-9]+\)/\([0-9]+\) ' + - '\([0-9]+\):\([0-9]+\):\([0-9]+\); author: \([^ ;]+\)') +parsedateprog = re.compile( + '^date: ([0-9]+)/([0-9]+)/([0-9]+) ' + + '([0-9]+):([0-9]+):([0-9]+); author: ([^ ;]+)') authormap = { 'guido': 'Guido van Rossum ', @@ -70,7 +70,7 @@ def formatrev(rev, prefix): print print -startprog = regex.compile("^Working file: \(.*\)$") +startprog = re.compile("^Working file: (.*)$") def getnextfile(f): while 1: diff --git a/Demo/pdist/rcsbump b/Demo/pdist/rcsbump index e4e9ed5..4fa078e 100755 --- a/Demo/pdist/rcsbump +++ b/Demo/pdist/rcsbump @@ -6,12 +6,12 @@ # Python script for bumping up an RCS major revision number. import sys -import regex +import re import rcslib import string WITHLOCK = 1 -majorrev_re = regex.compile('^[0-9]+') +majorrev_re = re.compile('^[0-9]+') dir = rcslib.RCS() diff --git a/Demo/pdist/rcslib.py b/Demo/pdist/rcslib.py index d5f7b65..3e63869 100755 --- a/Demo/pdist/rcslib.py +++ b/Demo/pdist/rcslib.py @@ -8,7 +8,7 @@ files and (possibly) corresponding work files. import fnmatch import os -import regsub +import re import string import tempfile @@ -150,7 +150,7 @@ class RCS: cmd = 'ci %s%s -t%s %s %s' % \ (lockflag, rev, f.name, otherflags, name) else: - message = regsub.gsub('\([\\"$`]\)', '\\\\\\1', message) + message = re.sub(r'([\"$`])', r'\\\1', message) cmd = 'ci %s%s -m"%s" %s %s' % \ (lockflag, rev, message, otherflags, name) return self._system(cmd) diff --git a/Demo/scripts/eqfix.py b/Demo/scripts/eqfix.py index 165ca49..35c43aa 100755 --- a/Demo/scripts/eqfix.py +++ b/Demo/scripts/eqfix.py @@ -29,7 +29,7 @@ # into a program for a different change to Python programs... import sys -import regex +import re import os from stat import * import string @@ -53,7 +53,7 @@ def main(): if fix(arg): bad = 1 sys.exit(bad) -ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$') +ispythonprog = re.compile('^[a-zA-Z0-9_]+\.py$') def ispython(name): return ispythonprog.match(name) >= 0 @@ -104,7 +104,7 @@ def fix(filename): if lineno == 1 and g is None and line[:2] == '#!': # Check for non-Python scripts words = string.split(line[2:]) - if words and regex.search('[pP]ython', words[0]) < 0: + if words and re.search('[pP]ython', words[0]) < 0: msg = filename + ': ' + words[0] msg = msg + ' script; not fixed\n' err(msg) diff --git a/Demo/scripts/ftpstats.py b/Demo/scripts/ftpstats.py index b37a58d..5c1599e 100755 --- a/Demo/scripts/ftpstats.py +++ b/Demo/scripts/ftpstats.py @@ -13,12 +13,12 @@ import os import sys -import regex +import re import string import getopt -pat = '^\([a-zA-Z0-9 :]*\)!\(.*\)!\(.*\)!\([<>].*\)!\([0-9]+\)!\([0-9]+\)$' -prog = regex.compile(pat) +pat = '^([a-zA-Z0-9 :]*)!(.*)!(.*)!([<>].*)!([0-9]+)!([0-9]+)$' +prog = re.compile(pat) def main(): maxitems = 25 diff --git a/Demo/scripts/mboxconvert.py b/Demo/scripts/mboxconvert.py index 502d774..8c462f3 100755 --- a/Demo/scripts/mboxconvert.py +++ b/Demo/scripts/mboxconvert.py @@ -10,7 +10,7 @@ import time import os import stat import getopt -import regex +import re def main(): dofile = mmdf @@ -45,7 +45,7 @@ def main(): if sts: sys.exit(sts) -numeric = regex.compile('[1-9][0-9]*') +numeric = re.compile('[1-9][0-9]*') def mh(dir): sts = 0 diff --git a/Demo/scripts/update.py b/Demo/scripts/update.py index 32ad6c8..c936026 100755 --- a/Demo/scripts/update.py +++ b/Demo/scripts/update.py @@ -8,10 +8,10 @@ import os import sys -import regex +import re -pat = '^\([^: \t\n]+\):\([1-9][0-9]*\):' -prog = regex.compile(pat) +pat = '^([^: \t\n]+):([1-9][0-9]*):' +prog = re.compile(pat) class FileObj: def __init__(self, filename): diff --git a/Demo/sockets/mcast.py b/Demo/sockets/mcast.py index 122dad7..1abd305 100755 --- a/Demo/sockets/mcast.py +++ b/Demo/sockets/mcast.py @@ -13,7 +13,6 @@ MYGROUP = '225.0.0.250' import sys import time import struct -import regsub from socket import * diff --git a/Demo/tix/grid.py b/Demo/tix/grid.py new file mode 100644 index 0000000..07ca87f --- /dev/null +++ b/Demo/tix/grid.py @@ -0,0 +1,28 @@ +### +import Tix as tk +from pprint import pprint + +r= tk.Tk() +r.title("test") + +l=tk.Label(r, name="a_label") +l.pack() + +class MyGrid(tk.Grid): + def __init__(self, *args, **kwargs): + kwargs['editnotify']= self.editnotify + tk.Grid.__init__(self, *args, **kwargs) + def editnotify(self, x, y): + return True + +g = MyGrid(r, name="a_grid", +selectunit="cell") +g.pack(fill=tk.BOTH) +for x in xrange(5): + for y in xrange(5): + g.set(x,y,text=str((x,y))) + +c = tk.Button(r, text="Close", command=r.destroy) +c.pack() + +tk.mainloop() diff --git a/Demo/tkinter/guido/ManPage.py b/Demo/tkinter/guido/ManPage.py index 911961e..221af88 100755 --- a/Demo/tkinter/guido/ManPage.py +++ b/Demo/tkinter/guido/ManPage.py @@ -1,6 +1,6 @@ # Widget to display a man page -import regex +import re from Tkinter import * from Tkinter import _tkinter from ScrolledText import ScrolledText @@ -11,10 +11,10 @@ ITALICFONT = '*-Courier-Medium-O-Normal-*-120-*' # XXX Recognizing footers is system dependent # (This one works for IRIX 5.2 and Solaris 2.2) -footerprog = regex.compile( +footerprog = re.compile( '^ Page [1-9][0-9]*[ \t]+\|^.*Last change:.*[1-9][0-9]*\n') -emptyprog = regex.compile('^[ \t]*\n') -ulprog = regex.compile('^[ \t]*[Xv!_][Xv!_ \t]*\n') +emptyprog = re.compile('^[ \t]*\n') +ulprog = re.compile('^[ \t]*[Xv!_][Xv!_ \t]*\n') # Basic Man Page class -- does not disable editing class EditableManPage(ScrolledText): diff --git a/Demo/tkinter/guido/mbox.py b/Demo/tkinter/guido/mbox.py index 9b16f6b..3c36d88 100755 --- a/Demo/tkinter/guido/mbox.py +++ b/Demo/tkinter/guido/mbox.py @@ -4,7 +4,7 @@ import os import sys -import regex +import re import getopt import string import mhlib @@ -157,7 +157,7 @@ def scan_unpost(e): scanmenu.unpost() scanmenu.invoke('active') -scanparser = regex.compile('^ *\([0-9]+\)') +scanparser = re.compile('^ *([0-9]+)') def open_folder(e=None): global folder, mhf diff --git a/Demo/tkinter/guido/tkman.py b/Demo/tkinter/guido/tkman.py index 11d9690..6b0b641 100755 --- a/Demo/tkinter/guido/tkman.py +++ b/Demo/tkinter/guido/tkman.py @@ -5,7 +5,7 @@ import sys import os import string -import regex +import re from Tkinter import * from ManPage import ManPage @@ -208,15 +208,15 @@ class SelectionBox: print 'Empty search string' return if not self.casevar.get(): - map = regex.casefold + map = re.IGNORECASE else: map = None try: if map: - prog = regex.compile(search, map) + prog = re.compile(search, map) else: - prog = regex.compile(search) - except regex.error, msg: + prog = re.compile(search) + except re.error, msg: self.frame.bell() print 'Regex error:', msg return diff --git a/Doc/Makefile.deps b/Doc/Makefile.deps index 832402d..20c0688 100644 --- a/Doc/Makefile.deps +++ b/Doc/Makefile.deps @@ -126,6 +126,7 @@ LIBFILES= $(MANSTYLES) $(INDEXSTYLES) $(COMMONTEX) \ lib/libwarnings.tex \ lib/libimp.tex \ lib/libzipimport.tex \ + lib/librunpy.tex \ lib/libpkgutil.tex \ lib/libparser.tex \ lib/libbltin.tex \ diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex index 5521b80..1982bae 100644 --- a/Doc/api/concrete.tex +++ b/Doc/api/concrete.tex @@ -156,7 +156,7 @@ There is no \cfunction{PyNone_Check()} function for the same reason. Create a new integer object with a value of \var{ival}. The current implementation keeps an array of integer objects for all - integers between \code{-1} and \code{100}, when you create an int in + integers between \code{-5} and \code{256}, when you create an int in that range you actually just get back a reference to the existing object. So it should be possible to change the value of \code{1}. I suspect the behaviour of Python in this case is undefined. :-) @@ -333,7 +333,9 @@ booleans. The following macros are available, however. The pointer value can be retrieved from the resulting value using \cfunction{PyLong_AsVoidPtr()}. \versionadded{1.5.2} -\end{cfuncdesc} + \versionchanged[If the integer is larger than LONG_MAX, + a positive long integer is returned]{2.5} + \end{cfuncdesc} \begin{cfuncdesc}{long}{PyLong_AsLong}{PyObject *pylong} Return a C \ctype{long} representation of the contents of @@ -394,6 +396,8 @@ booleans. The following macros are available, however. produce a usable \ctype{void} pointer for values created with \cfunction{PyLong_FromVoidPtr()}. \versionadded{1.5.2} + \versionchanged[For values outside 0..LONG_MAX, both signed and + unsigned integers are acccepted]{2.5} \end{cfuncdesc} @@ -1803,8 +1807,9 @@ format. \begin{cfuncdesc}{PyObject*}{PyList_GetItem}{PyObject *list, Py_ssize_t index} Return the object at position \var{pos} in the list pointed to by - \var{p}. If \var{pos} is out of bounds, return \NULL{} and set an - \exception{IndexError} exception. + \var{p}. The position must be positive, indexing from the end of the + list is not supported. If \var{pos} is out of bounds, return \NULL{} + and set an \exception{IndexError} exception. \end{cfuncdesc} \begin{cfuncdesc}{PyObject*}{PyList_GET_ITEM}{PyObject *list, Py_ssize_t i} @@ -2264,8 +2269,8 @@ There are a few functions specific to Python functions. \begin{cfuncdesc}{PyObject*}{PyFunction_New}{PyObject *code, PyObject *globals} Return a new function object associated with the code object - \var{code}. \var{globals} must be a dictionary with the the global - varaibles accessible to the function. + \var{code}. \var{globals} must be a dictionary with the global + variables accessible to the function. The function's docstring, name and \var{__module__} are retrieved from the code object, the argument defaults and closure are set to @@ -2811,6 +2816,7 @@ rather than explicitly calling \cfunction{PyGen_New}. \begin{cfuncdesc}{PyObject*}{PyGen_New}{PyFrameObject *frame} Create and return a new generator object based on the \var{frame} object. + A reference to \var{frame} is stolen by this function. The parameter must not be \NULL{}. \end{cfuncdesc} @@ -3025,9 +3031,7 @@ or the abstract number protocol (including \cfunction{PyNumber_Or()}, \cfunction{PyNumber_Xor()}, \cfunction{PyNumber_InPlaceAdd()}, \cfunction{PyNumber_InPlaceSubtract()}, \cfunction{PyNumber_InPlaceOr()}, and \cfunction{PyNumber_InPlaceXor()}). -Note, \cfunction{PyNumber_InPlaceSubtract()} is also useful clearing -clearing a set (\code{s-=s}). - + \begin{ctypedesc}{PySetObject} This subtype of \ctype{PyObject} is used to hold the internal data for both \class{set} and \class{frozenset} objects. It is like a @@ -3111,7 +3115,6 @@ The following functions and macros are available for instances of \class{frozenset}, or an instance of a subtype. \end{cfuncdesc} - The following functions are available for instances of \class{set} or its subtypes but not for instances of \class{frozenset} or its subtypes. @@ -3142,4 +3145,6 @@ its subtypes but not for instances of \class{frozenset} or its subtypes. of \class{set} or its subtype. \end{cfuncdesc} - +\begin{cfuncdesc}{int}{PySet_Clear}{PyObject *set} + Empty an existing set of all elements. +\end{cfuncdesc} diff --git a/Doc/api/intro.tex b/Doc/api/intro.tex index 608d562..96f18ec 100644 --- a/Doc/api/intro.tex +++ b/Doc/api/intro.tex @@ -569,8 +569,11 @@ defined in \file{Modules/getpath.c}). Sometimes, it is desirable to ``uninitialize'' Python. For instance, the application may want to start over (make another call to \cfunction{Py_Initialize()}) or the application is simply done with its -use of Python and wants to free all memory allocated by Python. This +use of Python and wants to free memory allocated by Python. This can be accomplished by calling \cfunction{Py_Finalize()}. The function \cfunction{Py_IsInitialized()}\ttindex{Py_IsInitialized()} returns true if Python is currently in the initialized state. More information about these functions is given in a later chapter. +Notice that \cfunction{Py_Finalize} does \emph{not} free all memory +allocated by the Python interpreter, e.g. memory allocated by extension +modules currently cannot be released. diff --git a/Doc/api/memory.tex b/Doc/api/memory.tex index 3dbe9a5..4bc2c7a 100644 --- a/Doc/api/memory.tex +++ b/Doc/api/memory.tex @@ -195,9 +195,7 @@ free(buf1); /* Fatal -- should be PyMem_Del() */ In addition to the functions aimed at handling raw memory blocks from the Python heap, objects in Python are allocated and released with \cfunction{PyObject_New()}, \cfunction{PyObject_NewVar()} and -\cfunction{PyObject_Del()}, or with their corresponding macros -\cfunction{PyObject_NEW()}, \cfunction{PyObject_NEW_VAR()} and -\cfunction{PyObject_DEL()}. +\cfunction{PyObject_Del()}. These will be explained in the next chapter on defining and implementing new object types in C. diff --git a/Doc/api/newtypes.tex b/Doc/api/newtypes.tex index b7e25b9..2d758b0 100644 --- a/Doc/api/newtypes.tex +++ b/Doc/api/newtypes.tex @@ -62,23 +62,6 @@ defining new object types. after this call as the memory is no longer a valid Python object. \end{cfuncdesc} -\begin{cfuncdesc}{\var{TYPE}*}{PyObject_NEW}{TYPE, PyTypeObject *type} - Macro version of \cfunction{PyObject_New()}, to gain performance at - the expense of safety. This does not check \var{type} for a \NULL{} - value. -\end{cfuncdesc} - -\begin{cfuncdesc}{\var{TYPE}*}{PyObject_NEW_VAR}{TYPE, PyTypeObject *type, - Py_ssize_t size} - Macro version of \cfunction{PyObject_NewVar()}, to gain performance - at the expense of safety. This does not check \var{type} for a - \NULL{} value. -\end{cfuncdesc} - -\begin{cfuncdesc}{void}{PyObject_DEL}{PyObject *op} - Macro version of \cfunction{PyObject_Del()}. -\end{cfuncdesc} - \begin{cfuncdesc}{PyObject*}{Py_InitModule}{char *name, PyMethodDef *methods} Create a new module object based on a name and table of functions, diff --git a/Doc/api/refcounts.dat b/Doc/api/refcounts.dat index f3bd32e..7bba011 100644 --- a/Doc/api/refcounts.dat +++ b/Doc/api/refcounts.dat @@ -31,6 +31,9 @@ # The parameter names are as they appear in the API manual, not the source # code. +PyBool_FromLong:PyObject*::+1: +PyBool_FromLong:long:v:0: + PyBuffer_FromObject:PyObject*::+1: PyBuffer_FromObject:PyObject*:base:+1: PyBuffer_FromObject:int:offset:: @@ -110,6 +113,35 @@ PyComplex_ImagAsDouble:PyObject*:op:0: PyComplex_RealAsDouble:double::: PyComplex_RealAsDouble:PyObject*:op:0: +PyDate_FromDate:PyObject*::+1: +PyDate_FromDate:int:year:: +PyDate_FromDate:int:month:: +PyDate_FromDate:int:day:: + +PyDate_FromTimestamp:PyObject*::+1: +PyDate_FromTimestamp:PyObject*:args:0: + +PyDateTime_FromDateAndTime:PyObject*::+1: +PyDateTime_FromDateAndTime:int:year:: +PyDateTime_FromDateAndTime:int:month:: +PyDateTime_FromDateAndTime:int:day:: +PyDateTime_FromDateAndTime:int:hour:: +PyDateTime_FromDateAndTime:int:minute:: +PyDateTime_FromDateAndTime:int:second:: +PyDateTime_FromDateAndTime:int:usecond:: + +PyDateTime_FromTimestamp:PyObject*::+1: +PyDateTime_FromTimestamp:PyObject*:args:0: + +PyDelta_FromDSU:PyObject*::+1: +PyDelta_FromDSU:int:days:: +PyDelta_FromDSU:int:seconds:: +PyDelta_FromDSU:int:useconds:: + +PyDescr_NewClassMethod:PyObject*::+1: +PyDescr_NewClassMethod:PyTypeObject*:type:: +PyDescr_NewClassMethod:PyMethodDef*:method:: + PyDescr_NewGetSet:PyObject*::+1: PyDescr_NewGetSet:PyTypeObject*:type:: PyDescr_NewGetSet:PyGetSetDef*:getset:: @@ -226,6 +258,15 @@ PyErr_Restore:PyObject*:type:-1: PyErr_Restore:PyObject*:value:-1: PyErr_Restore:PyObject*:traceback:-1: +PyErr_SetExcFromWindowsErr:PyObject*::null: +PyErr_SetExcFromWindowsErr:PyObject*:type:0: +PyErr_SetExcFromWindowsErr:int:ierr:: + +PyErr_SetExcFromWindowsErrWithFilename:PyObject*::null: +PyErr_SetExcFromWindowsErrWithFilename:PyObject*:type:0: +PyErr_SetExcFromWindowsErrWithFilename:int:ierr:: +PyErr_SetExcFromWindowsErrWithFilename:char*:filename:: + PyErr_SetFromErrno:PyObject*::null: PyErr_SetFromErrno:PyObject*:type:0: @@ -337,6 +378,13 @@ PyFloat_Check:PyObject*:p:0: PyFloat_FromDouble:PyObject*::+1: PyFloat_FromDouble:double:v:: +PyFloat_FromString:PyObject*::+1: +PyFloat_FromString:PyObject*:str:0: +PyFloat_FromString:char**:pend:0:ignored + +PyFrozenSet_New:PyObject*::+1: +PyFrozenSet_New:PyObject*:iterable:0: + PyFunction_GetClosure:PyObject*::0: PyFunction_GetClosure:PyObject*:op:0: @@ -364,6 +412,9 @@ PyFunction_SetDefaults:int::: PyFunction_SetDefaults:PyObject*:op:0: PyFunction_SetDefaults:PyObject*:defaults:+1: +PyGen_New:PyObject*::+1: +PyGen_New:PyFrameObject*:frame:0: + Py_InitModule:PyObject*::0: Py_InitModule:char*:name:: Py_InitModule:PyMethodDef[]:methods:: @@ -432,6 +483,14 @@ PyInt_Check:PyObject*:op:0: PyInt_FromLong:PyObject*::+1: PyInt_FromLong:long:ival:: +PyInt_FromString:PyObject*::+1: +PyInt_FromString:char*:str:0: +PyInt_FromString:char**:pend:0: +PyInt_FromString:int:base:0: + +PyInt_FromSsize_t:PyObject*::+1: +PyInt_FromSsize_t:Py_ssize_t:ival:: + PyInt_GetMax:long::: PyInterpreterState_Clear:void::: @@ -939,6 +998,31 @@ PyRun_File:int:start:: PyRun_File:PyObject*:globals:0: PyRun_File:PyObject*:locals:0: +PyRun_FileEx:PyObject*::+1:??? -- same as eval_code2() +PyRun_FileEx:FILE*:fp:: +PyRun_FileEx:char*:filename:: +PyRun_FileEx:int:start:: +PyRun_FileEx:PyObject*:globals:0: +PyRun_FileEx:PyObject*:locals:0: +PyRun_FileEx:int:closeit:: + +PyRun_FileFlags:PyObject*::+1:??? -- same as eval_code2() +PyRun_FileFlags:FILE*:fp:: +PyRun_FileFlags:char*:filename:: +PyRun_FileFlags:int:start:: +PyRun_FileFlags:PyObject*:globals:0: +PyRun_FileFlags:PyObject*:locals:0: +PyRun_FileFlags:PyCompilerFlags*:flags:: + +PyRun_FileExFlags:PyObject*::+1:??? -- same as eval_code2() +PyRun_FileExFlags:FILE*:fp:: +PyRun_FileExFlags:char*:filename:: +PyRun_FileExFlags:int:start:: +PyRun_FileExFlags:PyObject*:globals:0: +PyRun_FileExFlags:PyObject*:locals:0: +PyRun_FileExFlags:int:closeit:: +PyRun_FileExFlags:PyCompilerFlags*:flags:: + PyRun_InteractiveLoop:int::: PyRun_InteractiveLoop:FILE*:fp:: PyRun_InteractiveLoop:char*:filename:: @@ -960,6 +1044,13 @@ PyRun_String:int:start:: PyRun_String:PyObject*:globals:0: PyRun_String:PyObject*:locals:0: +PyRun_StringFlags:PyObject*::+1:??? -- same as eval_code2() +PyRun_StringFlags:char*:str:: +PyRun_StringFlags:int:start:: +PyRun_StringFlags:PyObject*:globals:0: +PyRun_StringFlags:PyObject*:locals:0: +PyRun_StringFlags:PyCompilerFlags*:flags:: + PySeqIter_New:PyObject*::+1: PySeqIter_New:PyObject*:seq:: @@ -1053,6 +1144,9 @@ PySet_Discard:int::: PySet_Discard:PyObject*:set:0: PySet_Discard:PyObject*:key:-1:no effect if key not found +PySet_New:PyObject*::+1: +PySet_New:PyObject*:iterable:0: + PySet_Pop:PyObject*::0:or returns NULL and raises KeyError if set is empty PySet_Pop:PyObject*:set:0: @@ -1167,6 +1261,12 @@ PyThreadState_New:PyInterpreterState*:interp:: PyThreadState_Swap:PyThreadState*::: PyThreadState_Swap:PyThreadState*:tstate:: +PyTime_FromTime:PyObject*::+1: +PyTime_FromTime:int:hour:: +PyTime_FromTime:int:minute:: +PyTime_FromTime:int:second:: +PyTime_FromTime:int:usecond:: + PyTuple_Check:int::: PyTuple_Check:PyObject*:p:0: @@ -1186,6 +1286,10 @@ PyTuple_GetSlice:int:high:: PyTuple_New:PyObject*::+1: PyTuple_New:int:len:: +PyTuple_Pack:PyObject*::+1: +PyTuple_Pack:int:len:: +PyTuple_Pack:PyObject*:...:0: + PyTuple_SET_ITEM:void::: PyTuple_SET_ITEM:PyTupleObject*:p:0: PyTuple_SET_ITEM:int:pos:: @@ -1298,6 +1402,19 @@ PyUnicode_Decode:int:size:: PyUnicode_Decode:const char*:encoding:: PyUnicode_Decode:const char*:errors:: +PyUnicode_DecodeUTF16Stateful:PyObject*::+1: +PyUnicode_DecodeUTF16Stateful:const char*:s:: +PyUnicode_DecodeUTF16Stateful:int:size:: +PyUnicode_DecodeUTF16Stateful:const char*:errors:: +PyUnicode_DecodeUTF16Stateful:int*:byteorder:: +PyUnicode_DecodeUTF16Stateful:int*:consumed:: + +PyUnicode_DecodeUTF8Stateful:PyObject*::+1: +PyUnicode_DecodeUTF8Stateful:const char*:s:: +PyUnicode_DecodeUTF8Stateful:int:size:: +PyUnicode_DecodeUTF8Stateful:const char*:errors:: +PyUnicode_DecodeUTF8Stateful:int*:consumed:: + PyUnicode_Encode:PyObject*::+1: PyUnicode_Encode:const Py_UNICODE*:s:: PyUnicode_Encode:int:size:: @@ -1513,6 +1630,12 @@ Py_CompileString:char*:str:: Py_CompileString:char*:filename:: Py_CompileString:int:start:: +Py_CompileStringFlags:PyObject*::+1: +Py_CompileStringFlags:char*:str:: +Py_CompileStringFlags:char*:filename:: +Py_CompileStringFlags:int:start:: +Py_CompileStringFlags:PyCompilerFlags*:flags:: + Py_DECREF:void::: Py_DECREF:PyObject*:o:-1: diff --git a/Doc/commontex/boilerplate.tex b/Doc/commontex/boilerplate.tex index b4c9f48..55a4184 100644 --- a/Doc/commontex/boilerplate.tex +++ b/Doc/commontex/boilerplate.tex @@ -5,5 +5,5 @@ Email: \email{docs@python.org} } -\date{\today} % XXX update before final release! +\date{5th April 2006} % XXX update before final release! \input{patchlevel} % include Python version information diff --git a/Doc/commontex/license.tex b/Doc/commontex/license.tex index 525ce8a..d1554c2 100644 --- a/Doc/commontex/license.tex +++ b/Doc/commontex/license.tex @@ -49,6 +49,8 @@ GPL-compatible; the table below summarizes the various releases. \linev{2.4}{2.3}{2004}{PSF}{yes} \linev{2.4.1}{2.4}{2005}{PSF}{yes} \linev{2.4.2}{2.4.1}{2005}{PSF}{yes} + \linev{2.4.3}{2.4.2}{2006}{PSF}{yes} + \linev{2.5}{2.4}{2006}{PSF}{yes} \end{tablev} \note{GPL-compatible doesn't mean that we're distributing @@ -430,26 +432,49 @@ The source for the \module{fpectl} module includes the following notice: The source code for the \module{md5} module contains the following notice: \begin{verbatim} -Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All -rights reserved. - -License to copy and use this software is granted provided that it -is identified as the "RSA Data Security, Inc. MD5 Message-Digest -Algorithm" in all material mentioning or referencing this software -or this function. - -License is also granted to make and use derivative works provided -that such works are identified as "derived from the RSA Data -Security, Inc. MD5 Message-Digest Algorithm" in all material -mentioning or referencing the derived work. - -RSA Data Security, Inc. makes no representations concerning either -the merchantability of this software or the suitability of this -software for any particular purpose. It is provided "as is" -without express or implied warranty of any kind. - -These notices must be retained in any copies of any part of this -documentation and/or software. + Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + L. Peter Deutsch + ghost@aladdin.com + + Independent implementation of MD5 (RFC 1321). + + This code implements the MD5 Algorithm defined in RFC 1321, whose + text is available at + http://www.ietf.org/rfc/rfc1321.txt + The code is derived from the text of the RFC, including the test suite + (section A.5) but excluding the rest of Appendix A. It does not include + any code or documentation that is identified in the RFC as being + copyrighted. + + The original and principal author of md5.h is L. Peter Deutsch + . Other authors are noted in the change history + that follows (in reverse chronological order): + + 2002-04-13 lpd Removed support for non-ANSI compilers; removed + references to Ghostscript; clarified derivation from RFC 1321; + now handles byte order either statically or dynamically. + 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. + 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5); + added conditionalization for C++ compilation from Martin + Purschke . + 1999-05-03 lpd Original version. \end{verbatim} diff --git a/Doc/dist/dist.tex b/Doc/dist/dist.tex index d6ddad8..3ba51d0 100644 --- a/Doc/dist/dist.tex +++ b/Doc/dist/dist.tex @@ -1467,7 +1467,7 @@ script as follows: \lineii{\%description (section)}{\option{long\_description}} \end{tableii} -Additionally, there many options in \file{.spec} files that don't have +Additionally, there are many options in \file{.spec} files that don't have corresponding options in the setup script. Most of these are handled through options to the \command{bdist\_rpm} command as follows: @@ -1737,6 +1737,8 @@ password: \chapter{Uploading Packages to the Package Index} \label{package-upload} +\versionadded{2.5} + The Python Package Index (PyPI) not only stores the package info, but also the package data if the author of the package wishes to. The distutils command \command{upload} pushes the distribution files to PyPI. @@ -1754,8 +1756,21 @@ built using an earlier invocation of \file{setup.py}, but that only distributions named on the command line for the invocation including the \command{upload} command are uploaded. -The \command{upload} command uses the username and password stored in -the file \file{\$HOME/.pypirc}, see section~\ref{pypirc}. +The \command{upload} command uses the username, password, and repository +URL from the \file{\$HOME/.pypirc} file (see section~\ref{pypirc} for +more on this file). + +You can use the \programopt{--sign} option to tell \command{upload} to +sign each uploaded file using GPG (GNU Privacy Guard). The +\program{gpg} program must be available for execution on the system +\envvar{PATH}. You can also specify which key to use for signing +using the \programopt{--identity=\var{name}} option. + +Other \command{upload} options include +\programopt{--repository=\var{url}} (which lets you override the +repository setting from \file{\$HOME/.pypirc}), and +\programopt{--show-response} (which displays the full response text +from the PyPI server for help in debugging upload problems). \chapter{Examples} \label{examples} diff --git a/Doc/howto/regex.tex b/Doc/howto/regex.tex index 87fdad2..f9867ae 100644 --- a/Doc/howto/regex.tex +++ b/Doc/howto/regex.tex @@ -33,11 +33,8 @@ This document is available from The \module{re} module was added in Python 1.5, and provides Perl-style regular expression patterns. Earlier versions of Python -came with the \module{regex} module, which provides Emacs-style -patterns. Emacs-style patterns are slightly less readable and -don't provide as many features, so there's not much reason to use -the \module{regex} module when writing new code, though you might -encounter old code that uses it. +came with the \module{regex} module, which provided Emacs-style +patterns. \module{regex} module was removed in Python 2.5. Regular expressions (or REs) are essentially a tiny, highly specialized programming language embedded inside Python and made @@ -1458,7 +1455,7 @@ Jeffrey Friedl's \citetitle{Mastering Regular Expressions}, published by O'Reilly. Unfortunately, it exclusively concentrates on Perl and Java's flavours of regular expressions, and doesn't contain any Python material at all, so it won't be useful as a reference for programming -in Python. (The first edition covered Python's now-obsolete +in Python. (The first edition covered Python's now-removed \module{regex} module, which won't help you much.) Consider checking it out from your library. diff --git a/Doc/lib/compiler.tex b/Doc/lib/compiler.tex index e619a9a..f0926e7 100644 --- a/Doc/lib/compiler.tex +++ b/Doc/lib/compiler.tex @@ -40,9 +40,9 @@ modules contained in the package. \begin{funcdesc}{parse}{buf} Returns an abstract syntax tree for the Python source code in \var{buf}. -The function raises SyntaxError if there is an error in the source -code. The return value is a \class{compiler.ast.Module} instance that -contains the tree. +The function raises \exception{SyntaxError} if there is an error in the +source code. The return value is a \class{compiler.ast.Module} instance +that contains the tree. \end{funcdesc} \begin{funcdesc}{parseFile}{path} diff --git a/Doc/lib/email-dir.py b/Doc/lib/email-dir.py index 2d89a2f..c04f57d 100644 --- a/Doc/lib/email-dir.py +++ b/Doc/lib/email-dir.py @@ -1,83 +1,69 @@ #!/usr/bin/env python -"""Send the contents of a directory as a MIME message. +"""Send the contents of a directory as a MIME message.""" -Usage: dirmail [options] from to [to ...]* - -Options: - -h / --help - Print this message and exit. - - -d directory - --directory=directory - Mail the contents of the specified directory, otherwise use the - current directory. Only the regular files in the directory are sent, - and we don't recurse to subdirectories. - -`from' is the email address of the sender of the message. - -`to' is the email address of the recipient of the message, and multiple -recipients may be given. - -The email is sent by forwarding to your local SMTP server, which then does the -normal delivery process. Your local machine must be running an SMTP server. -""" - -import sys import os -import getopt +import sys import smtplib # For guessing MIME type based on file name extension import mimetypes -from email import Encoders -from email.Message import Message -from email.MIMEAudio import MIMEAudio -from email.MIMEBase import MIMEBase -from email.MIMEMultipart import MIMEMultipart -from email.MIMEImage import MIMEImage -from email.MIMEText import MIMEText - -COMMASPACE = ', ' +from optparse import OptionParser +from email import encoders +from email.message import Message +from email.mime.audio import MIMEAudio +from email.mime.base import MIMEBase +from email.mime.image import MIMEImage +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText -def usage(code, msg=''): - print >> sys.stderr, __doc__ - if msg: - print >> sys.stderr, msg - sys.exit(code) +COMMASPACE = ', ' def main(): - try: - opts, args = getopt.getopt(sys.argv[1:], 'hd:', ['help', 'directory=']) - except getopt.error, msg: - usage(1, msg) - - dir = os.curdir - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-d', '--directory'): - dir = arg - - if len(args) < 2: - usage(1) - - sender = args[0] - recips = args[1:] - + parser = OptionParser(usage="""\ +Send the contents of a directory as a MIME message. + +Usage: %prog [options] + +Unless the -o option is given, the email is sent by forwarding to your local +SMTP server, which then does the normal delivery process. Your local machine +must be running an SMTP server. +""") + parser.add_option('-d', '--directory', + type='string', action='store', + help="""Mail the contents of the specified directory, + otherwise use the current directory. Only the regular + files in the directory are sent, and we don't recurse to + subdirectories.""") + parser.add_option('-o', '--output', + type='string', action='store', metavar='FILE', + help="""Print the composed message to FILE instead of + sending the message to the SMTP server.""") + parser.add_option('-s', '--sender', + type='string', action='store', metavar='SENDER', + help='The value of the From: header (required)') + parser.add_option('-r', '--recipient', + type='string', action='append', metavar='RECIPIENT', + default=[], dest='recipients', + help='A To: header value (at least one required)') + opts, args = parser.parse_args() + if not opts.sender or not opts.recipients: + parser.print_help() + sys.exit(1) + directory = opts.directory + if not directory: + directory = '.' # Create the enclosing (outer) message outer = MIMEMultipart() - outer['Subject'] = 'Contents of directory %s' % os.path.abspath(dir) - outer['To'] = COMMASPACE.join(recips) - outer['From'] = sender + outer['Subject'] = 'Contents of directory %s' % os.path.abspath(directory) + outer['To'] = COMMASPACE.join(opts.recipients) + outer['From'] = opts.sender outer.preamble = 'You will not see this in a MIME-aware mail reader.\n' - # To guarantee the message ends with a newline - outer.epilogue = '' - for filename in os.listdir(dir): - path = os.path.join(dir, filename) + for filename in os.listdir(directory): + path = os.path.join(directory, filename) if not os.path.isfile(path): continue # Guess the content type based on the file's extension. Encoding @@ -108,16 +94,21 @@ def main(): msg.set_payload(fp.read()) fp.close() # Encode the payload using Base64 - Encoders.encode_base64(msg) + encoders.encode_base64(msg) # Set the filename parameter msg.add_header('Content-Disposition', 'attachment', filename=filename) outer.attach(msg) - - # Now send the message - s = smtplib.SMTP() - s.connect() - s.sendmail(sender, recips, outer.as_string()) - s.close() + # Now send or store the message + composed = outer.as_string() + if opts.output: + fp = open(opts.output, 'w') + fp.write(composed) + fp.close() + else: + s = smtplib.SMTP() + s.connect() + s.sendmail(opts.sender, opts.recipients, composed) + s.close() if __name__ == '__main__': diff --git a/Doc/lib/email-mime.py b/Doc/lib/email-mime.py index 048a59f..5097253 100644 --- a/Doc/lib/email-mime.py +++ b/Doc/lib/email-mime.py @@ -2,8 +2,8 @@ import smtplib # Here are the email package modules we'll need -from email.MIMEImage import MIMEImage -from email.MIMEMultipart import MIMEMultipart +from email.mime.image import MIMEImage +from email.mime.multipart import MIMEMultipart COMMASPACE = ', ' @@ -15,8 +15,6 @@ msg['Subject'] = 'Our family reunion' msg['From'] = me msg['To'] = COMMASPACE.join(family) msg.preamble = 'Our family reunion' -# Guarantees the message ends in a newline -msg.epilogue = '' # Assume we know that the image files are all in PNG format for file in pngfiles: diff --git a/Doc/lib/email-simple.py b/Doc/lib/email-simple.py index a445f1b..44152a4 100644 --- a/Doc/lib/email-simple.py +++ b/Doc/lib/email-simple.py @@ -2,7 +2,7 @@ import smtplib # Import the email modules we'll need -from email.MIMEText import MIMEText +from email.mime.text import MIMEText # Open a plain text file for reading. For this example, assume that # the text file contains only ASCII characters. diff --git a/Doc/lib/email-unpack.py b/Doc/lib/email-unpack.py index b166fdb..fc05d99 100644 --- a/Doc/lib/email-unpack.py +++ b/Doc/lib/email-unpack.py @@ -1,59 +1,44 @@ #!/usr/bin/env python -"""Unpack a MIME message into a directory of files. +"""Unpack a MIME message into a directory of files.""" -Usage: unpackmail [options] msgfile - -Options: - -h / --help - Print this message and exit. - - -d directory - --directory=directory - Unpack the MIME message into the named directory, which will be - created if it doesn't already exist. - -msgfile is the path to the file containing the MIME message. -""" - -import sys import os -import getopt +import sys +import email import errno import mimetypes -import email - -def usage(code, msg=''): - print >> sys.stderr, __doc__ - if msg: - print >> sys.stderr, msg - sys.exit(code) +from optparse import OptionParser def main(): - try: - opts, args = getopt.getopt(sys.argv[1:], 'hd:', ['help', 'directory=']) - except getopt.error, msg: - usage(1, msg) - - dir = os.curdir - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-d', '--directory'): - dir = arg + parser = OptionParser(usage="""\ +Unpack a MIME message into a directory of files. + +Usage: %prog [options] msgfile +""") + parser.add_option('-d', '--directory', + type='string', action='store', + help="""Unpack the MIME message into the named + directory, which will be created if it doesn't already + exist.""") + opts, args = parser.parse_args() + if not opts.directory: + parser.print_help() + sys.exit(1) try: msgfile = args[0] except IndexError: - usage(1) + parser.print_help() + sys.exit(1) try: - os.mkdir(dir) + os.mkdir(opts.directory) except OSError, e: # Ignore directory exists error - if e.errno <> errno.EEXIST: raise + if e.errno <> errno.EEXIST: + raise fp = open(msgfile) msg = email.message_from_file(fp) @@ -74,8 +59,8 @@ def main(): ext = '.bin' filename = 'part-%03d%s' % (counter, ext) counter += 1 - fp = open(os.path.join(dir, filename), 'wb') - fp.write(part.get_payload(decode=1)) + fp = open(os.path.join(opts.directory, filename), 'wb') + fp.write(part.get_payload(decode=True)) fp.close() diff --git a/Doc/lib/email.tex b/Doc/lib/email.tex index 3a90e22..6853325 100644 --- a/Doc/lib/email.tex +++ b/Doc/lib/email.tex @@ -1,4 +1,4 @@ -% Copyright (C) 2001-2004 Python Software Foundation +% Copyright (C) 2001-2006 Python Software Foundation % Author: barry@python.org (Barry Warsaw) \section{\module{email} --- @@ -18,10 +18,10 @@ subsumes most of the functionality in several older standard modules such as \refmodule{rfc822}, \refmodule{mimetools}, \refmodule{multifile}, and other non-standard packages such as \module{mimecntl}. It is specifically \emph{not} designed to do any -sending of email messages to SMTP (\rfc{2821}) servers; that is the -function of the \refmodule{smtplib} module. The \module{email} -package attempts to be as RFC-compliant as possible, supporting in -addition to \rfc{2822}, such MIME-related RFCs as +sending of email messages to SMTP (\rfc{2821}), NNTP, or other servers; those +are functions of modules such as \refmodule{smtplib} and \refmodule{nntplib}. +The \module{email} package attempts to be as RFC-compliant as possible, +supporting in addition to \rfc{2822}, such MIME-related RFCs as \rfc{2045}, \rfc{2046}, \rfc{2047}, and \rfc{2231}. The primary distinguishing feature of the \module{email} package is @@ -41,7 +41,7 @@ The following sections describe the functionality of the should be common in applications: an email message is read as flat text from a file or other source, the text is parsed to produce the object structure of the email message, this structure is manipulated, -and finally rendered back into flat text. +and finally, the object tree is rendered back into flat text. It is perfectly feasible to create the object structure out of whole cloth --- i.e. completely from scratch. From there, a similar @@ -56,6 +56,7 @@ package, a section on differences and porting is provided. \begin{seealso} \seemodule{smtplib}{SMTP protocol client} + \seemodule{nntplib}{NNTP protocol client} \end{seealso} \subsection{Representing an email message} @@ -88,22 +89,51 @@ package, a section on differences and porting is provided. \subsection{Iterators} \input{emailiter} -\subsection{Package History} +\subsection{Package History\label{email-pkg-history}} -Version 1 of the \module{email} package was bundled with Python -releases up to Python 2.2.1. Version 2 was developed for the Python -2.3 release, and backported to Python 2.2.2. It was also available as -a separate distutils-based package, and is compatible back to Python 2.1. +This table describes the release history of the email package, corresponding +to the version of Python that the package was released with. For purposes of +this document, when you see a note about change or added versions, these refer +to the Python version the change was made it, \emph{not} the email package +version. This table also describes the Python compatibility of each version +of the package. -\module{email} version 3.0 was released with Python 2.4 and as a separate -distutils-based package. It is compatible back to Python 2.3. +\begin{tableiii}{l|l|l}{constant}{email version}{distributed with}{compatible with} +\lineiii{1.x}{Python 2.2.0 to Python 2.2.1}{\emph{no longer supported}} +\lineiii{2.5}{Python 2.2.2+ and Python 2.3}{Python 2.1 to 2.5} +\lineiii{3.0}{Python 2.4}{Python 2.3 to 2.5} +\lineiii{4.0}{Python 2.5}{Python 2.3 to 2.5} +\end{tableiii} -Here are the differences between \module{email} version 3 and version 2: +Here are the major differences between \module{email} verson 4 and version 3: + +\begin{itemize} +\item All modules have been renamed according to \pep{8} standards. For + example, the version 3 module \module{email.Message} was renamed to + \module{email.message} in version 4. + +\item A new subpackage \module{email.mime} was added and all the version 3 + \module{email.MIME*} modules were renamed and situated into the + \module{email.mime} subpackage. For example, the version 3 module + \module{email.MIMEText} was renamed to \module{email.mime.text}. + + \emph{Note that the version 3 names will continue to work until Python + 2.6}. + +\item The \module{email.mime.application} module was added, which contains the + \class{MIMEApplication} class. + +\item Methods that were deprecated in version 3 have been removed. These + include \method{Generator.__call__()}, \method{Message.get_type()}, + \method{Message.get_main_type()}, \method{Message.get_subtype()}. +\end{itemize} + +Here are the major differences between \module{email} version 3 and version 2: \begin{itemize} \item The \class{FeedParser} class was introduced, and the \class{Parser} class was implemented in terms of the \class{FeedParser}. All parsing - there for is non-strict, and parsing will make a best effort never to + therefore is non-strict, and parsing will make a best effort never to raise an exception. Problems found while parsing messages are stored in the message's \var{defect} attribute. @@ -117,7 +147,7 @@ Here are the differences between \module{email} version 3 and version 2: \method{Generator.__call__()}, \method{Message.get_type()}, \method{Message.get_main_type()}, \method{Message.get_subtype()}, and the \var{strict} argument to the \class{Parser} class. These are - expected to be removed in email 3.1. + expected to be removed in future versions. \item Support for Pythons earlier than 2.3 has been removed. \end{itemize} @@ -278,12 +308,12 @@ The \class{Message} class has the following differences: \item The method \method{getpayloadastext()} was removed. Similar functionality is supported by the \class{DecodedGenerator} class in the - \refmodule{email.Generator} module. + \refmodule{email.generator} module. \item The method \method{getbodyastext()} was removed. You can get similar functionality by creating an iterator with \function{typed_subpart_iterator()} in the - \refmodule{email.Iterators} module. + \refmodule{email.iterators} module. \end{itemize} The \class{Parser} class has no differences in its public interface. @@ -295,7 +325,7 @@ notification\footnote{Delivery Status Notifications (DSN) are defined in \rfc{1894}.}. The \class{Generator} class has no differences in its public -interface. There is a new class in the \refmodule{email.Generator} +interface. There is a new class in the \refmodule{email.generator} module though, called \class{DecodedGenerator} which provides most of the functionality previously available in the \method{Message.getpayloadastext()} method. @@ -329,11 +359,11 @@ The following modules and classes have been changed: \module{mimelib} provided some utility functions in its \module{address} and \module{date} modules. All of these functions -have been moved to the \refmodule{email.Utils} module. +have been moved to the \refmodule{email.utils} module. The \code{MsgReader} class/module has been removed. Its functionality is most closely supported in the \function{body_line_iterator()} -function in the \refmodule{email.Iterators} module. +function in the \refmodule{email.iterators} module. \subsection{Examples} diff --git a/Doc/lib/emailcharsets.tex b/Doc/lib/emailcharsets.tex index 18f2a01..e0be68a 100644 --- a/Doc/lib/emailcharsets.tex +++ b/Doc/lib/emailcharsets.tex @@ -1,4 +1,4 @@ -\declaremodule{standard}{email.Charset} +\declaremodule{standard}{email.charset} \modulesynopsis{Character Sets} This module provides a class \class{Charset} for representing @@ -7,6 +7,8 @@ well as a character set registry and several convenience methods for manipulating this registry. Instances of \class{Charset} are used in several other modules within the \module{email} package. +Import this class from the \module{email.charset} module. + \versionadded{2.2.2} \begin{classdesc}{Charset}{\optional{input_charset}} @@ -153,7 +155,7 @@ input charset to the output charset automatically. This is not useful for multibyte character sets, which have line length issues (multibyte characters must be split on a character, not a byte boundary); use the higher-level \class{Header} class to deal with these issues (see -\refmodule{email.Header}). \var{convert} defaults to \code{False}. +\refmodule{email.header}). \var{convert} defaults to \code{False}. The type of encoding (base64 or quoted-printable) will be based on the \var{header_encoding} attribute. @@ -188,7 +190,7 @@ This method allows you to compare two \class{Charset} instances for equality. This method allows you to compare two \class{Charset} instances for inequality. \end{methoddesc} -The \module{email.Charset} module also provides the following +The \module{email.charset} module also provides the following functions for adding new entries to the global character set, alias, and codec registries: diff --git a/Doc/lib/emailencoders.tex b/Doc/lib/emailencoders.tex index a49e04d..3d05c2a 100644 --- a/Doc/lib/emailencoders.tex +++ b/Doc/lib/emailencoders.tex @@ -1,4 +1,4 @@ -\declaremodule{standard}{email.Encoders} +\declaremodule{standard}{email.encoders} \modulesynopsis{Encoders for email message payloads.} When creating \class{Message} objects from scratch, you often need to @@ -7,7 +7,7 @@ This is especially true for \mimetype{image/*} and \mimetype{text/*} type messages containing binary data. The \module{email} package provides some convenient encodings in its -\module{Encoders} module. These encoders are actually used by the +\module{encoders} module. These encoders are actually used by the \class{MIMEAudio} and \class{MIMEImage} class constructors to provide default encodings. All encoder functions take exactly one argument, the message object to encode. They usually extract the payload, encode it, and reset the diff --git a/Doc/lib/emailexc.tex b/Doc/lib/emailexc.tex index 6ac0889..3cef1d5 100644 --- a/Doc/lib/emailexc.tex +++ b/Doc/lib/emailexc.tex @@ -1,8 +1,8 @@ -\declaremodule{standard}{email.Errors} +\declaremodule{standard}{email.errors} \modulesynopsis{The exception classes used by the email package.} The following exception classes are defined in the -\module{email.Errors} module: +\module{email.errors} module: \begin{excclassdesc}{MessageError}{} This is the base class for all exceptions that the \module{email} @@ -59,7 +59,7 @@ problem was found, so for example, if a message nested inside a \mimetype{multipart/alternative} had a malformed header, that nested message object would have a defect, but the containing messages would not. -All defect classes are subclassed from \class{email.Errors.MessageDefect}, but +All defect classes are subclassed from \class{email.errors.MessageDefect}, but this class is \emph{not} an exception! \versionadded[All the defect classes were added]{2.4} diff --git a/Doc/lib/emailgenerator.tex b/Doc/lib/emailgenerator.tex index 330abc0..3415442 100644 --- a/Doc/lib/emailgenerator.tex +++ b/Doc/lib/emailgenerator.tex @@ -1,4 +1,4 @@ -\declaremodule{standard}{email.Generator} +\declaremodule{standard}{email.generator} \modulesynopsis{Generate flat text email messages from a message structure.} One of the most common tasks is to generate the flat text of the email @@ -8,7 +8,7 @@ module or the \refmodule{nntplib} module, or print the message on the console. Taking a message object structure and producing a flat text document is the job of the \class{Generator} class. -Again, as with the \refmodule{email.Parser} module, you aren't limited +Again, as with the \refmodule{email.parser} module, you aren't limited to the functionality of the bundled generator; you could write one from scratch yourself. However the bundled generator knows how to generate most email in a standards-compliant way, should handle MIME @@ -17,7 +17,8 @@ transformation from flat text, to a message structure via the \class{Parser} class, and back to flat text, is idempotent (the input is identical to the output). -Here are the public methods of the \class{Generator} class: +Here are the public methods of the \class{Generator} class, imported from the +\module{email.generator} module: \begin{classdesc}{Generator}{outfp\optional{, mangle_from_\optional{, maxheaderlen}}} @@ -40,7 +41,7 @@ mailbox format files. Optional \var{maxheaderlen} specifies the longest length for a non-continued header. When a header line is longer than \var{maxheaderlen} (in characters, with tabs expanded to 8 spaces), -the header will be split as defined in the \module{email.Header} +the header will be split as defined in the \module{email.header.Header} class. Set to zero to disable header wrapping. The default is 78, as recommended (but not required) by \rfc{2822}. \end{classdesc} @@ -81,9 +82,9 @@ be used in extended print statements. As a convenience, see the methods \method{Message.as_string()} and \code{str(aMessage)}, a.k.a. \method{Message.__str__()}, which simplify the generation of a formatted string representation of a -message object. For more detail, see \refmodule{email.Message}. +message object. For more detail, see \refmodule{email.message}. -The \module{email.Generator} module also provides a derived class, +The \module{email.generator} module also provides a derived class, called \class{DecodedGenerator} which is like the \class{Generator} base class, except that non-\mimetype{text} parts are substituted with a format string representing the part. @@ -128,13 +129,5 @@ The default value for \var{fmt} is \code{None}, meaning \versionadded{2.2.2} \end{classdesc} -\subsubsection{Deprecated methods} - -The following methods are deprecated in \module{email} version 2. -They are documented here for completeness. - -\begin{methoddesc}[Generator]{__call__}{msg\optional{, unixfrom}} -This method is identical to the \method{flatten()} method. - -\deprecated{2.2.2}{Use the \method{flatten()} method instead.} -\end{methoddesc} +\versionchanged[The previously deprecated method \method{__call__()} was +removed]{2.5} diff --git a/Doc/lib/emailheaders.tex b/Doc/lib/emailheaders.tex index 2795644b..524d08c 100644 --- a/Doc/lib/emailheaders.tex +++ b/Doc/lib/emailheaders.tex @@ -1,4 +1,4 @@ -\declaremodule{standard}{email.Header} +\declaremodule{standard}{email.header} \modulesynopsis{Representing non-ASCII headers} \rfc{2822} is the base standard that describes the format of email @@ -15,17 +15,18 @@ slew of RFCs have been written describing how to encode email containing non-\ASCII{} characters into \rfc{2822}-compliant format. These RFCs include \rfc{2045}, \rfc{2046}, \rfc{2047}, and \rfc{2231}. The \module{email} package supports these standards in its -\module{email.Header} and \module{email.Charset} modules. +\module{email.header} and \module{email.charset} modules. If you want to include non-\ASCII{} characters in your email headers, say in the \mailheader{Subject} or \mailheader{To} fields, you should use the \class{Header} class and assign the field in the \class{Message} object to an instance of \class{Header} instead of -using a string for the header value. For example: +using a string for the header value. Import the \class{Header} class from the +\module{email.header} module. For example: \begin{verbatim} ->>> from email.Message import Message ->>> from email.Header import Header +>>> from email.message import Message +>>> from email.header import Header >>> msg = Message() >>> h = Header('p\xf6stal', 'iso-8859-1') >>> msg['Subject'] = h @@ -87,7 +88,7 @@ Optional \var{errors} is passed straight through to the Append the string \var{s} to the MIME header. Optional \var{charset}, if given, should be a \class{Charset} instance -(see \refmodule{email.Charset}) or the name of a character set, which +(see \refmodule{email.charset}) or the name of a character set, which will be converted to a \class{Charset} instance. A value of \code{None} (the default) means that the \var{charset} given in the constructor is used. @@ -139,7 +140,7 @@ This method allows you to compare two \class{Header} instances for equality. This method allows you to compare two \class{Header} instances for inequality. \end{methoddesc} -The \module{email.Header} module also provides the following +The \module{email.header} module also provides the following convenient functions. \begin{funcdesc}{decode_header}{header} @@ -155,7 +156,7 @@ encoded string. Here's an example: \begin{verbatim} ->>> from email.Header import decode_header +>>> from email.header import decode_header >>> decode_header('=?iso-8859-1?q?p=F6stal?=') [('p\xf6stal', 'iso-8859-1')] \end{verbatim} diff --git a/Doc/lib/emailiter.tex b/Doc/lib/emailiter.tex index d1a8f98..ef8ef6f 100644 --- a/Doc/lib/emailiter.tex +++ b/Doc/lib/emailiter.tex @@ -1,8 +1,8 @@ -\declaremodule{standard}{email.Iterators} +\declaremodule{standard}{email.iterators} \modulesynopsis{Iterate over a message object tree.} Iterating over a message object tree is fairly easy with the -\method{Message.walk()} method. The \module{email.Iterators} module +\method{Message.walk()} method. The \module{email.iterators} module provides some useful higher level iterations over message object trees. diff --git a/Doc/lib/emailmessage.tex b/Doc/lib/emailmessage.tex index 9b41852..7bd7dd8 100644 --- a/Doc/lib/emailmessage.tex +++ b/Doc/lib/emailmessage.tex @@ -1,10 +1,11 @@ -\declaremodule{standard}{email.Message} +\declaremodule{standard}{email.message} \modulesynopsis{The base class representing email messages.} The central class in the \module{email} package is the -\class{Message} class; it is the base class for the \module{email} -object model. \class{Message} provides the core functionality for -setting and querying header fields, and for accessing message bodies. +\class{Message} class, imported from the \module{email.message} module. It is +the base class for the \module{email} object model. \class{Message} provides +the core functionality for setting and querying header fields, and for +accessing message bodies. Conceptually, a \class{Message} object consists of \emph{headers} and \emph{payloads}. Headers are \rfc{2822} style field names and @@ -45,7 +46,7 @@ begin with \code{From }. For more flexibility, instantiate a \begin{verbatim} from cStringIO import StringIO -from email.Generator import Generator +from email.generator import Generator fp = StringIO() g = Generator(fp, mangle_from_=False, maxheaderlen=60) g.flatten(msg) @@ -119,7 +120,7 @@ client's responsibility to ensure the payload invariants. Optional \begin{methoddesc}[Message]{set_charset}{charset} Set the character set of the payload to \var{charset}, which can -either be a \class{Charset} instance (see \refmodule{email.Charset}), a +either be a \class{Charset} instance (see \refmodule{email.charset}), a string naming a character set, or \code{None}. If it is a string, it will be converted to a \class{Charset} instance. If \var{charset} is \code{None}, the @@ -128,8 +129,8 @@ or \code{None}. If it is a string, it will be converted to a \exception{TypeError}. The message will be assumed to be of type \mimetype{text/*} encoded with -\code{charset.input_charset}. It will be converted to -\code{charset.output_charset} +\var{charset.input_charset}. It will be converted to +\var{charset.output_charset} and encoded properly, if needed, when generating the plain text representation of the message. MIME headers (\mailheader{MIME-Version}, \mailheader{Content-Type}, @@ -513,6 +514,9 @@ message/rfc822 \end{verbatim} \end{methoddesc} +\versionchanged[The previously deprecated methods \method{get_type()}, +\method{get_main_type()}, and \method{get_subtype()} were removed]{2.5} + \class{Message} objects can also optionally contain two instance attributes, which can be used when generating the plain text of a MIME message. @@ -532,7 +536,7 @@ to the message's \var{preamble} attribute. When the \class{Generator} is writing out the plain text representation of a MIME message, and it finds the message has a \var{preamble} attribute, it will write this text in the area between the headers and the first boundary. See -\refmodule{email.Parser} and \refmodule{email.Generator} for details. +\refmodule{email.parser} and \refmodule{email.generator} for details. Note that if the message object has no preamble, the \var{preamble} attribute will be \code{None}. @@ -543,58 +547,15 @@ The \var{epilogue} attribute acts the same way as the \var{preamble} attribute, except that it contains text that appears between the last boundary and the end of the message. -One note: when generating the flat text for a \mimetype{multipart} -message that has no \var{epilogue} (using the standard -\class{Generator} class), no newline is added after the closing -boundary line. If the message object has an \var{epilogue} and its -value does not start with a newline, a newline is printed after the -closing boundary. This seems a little clumsy, but it makes the most -practical sense. The upshot is that if you want to ensure that a -newline get printed after your closing \mimetype{multipart} boundary, -set the \var{epilogue} to the empty string. +\versionchanged[You do not need to set the epilogue to the empty string in +order for the \class{Generator} to print a newline at the end of the +file]{2.5} \end{datadesc} \begin{datadesc}{defects} The \var{defects} attribute contains a list of all the problems found when -parsing this message. See \refmodule{email.Errors} for a detailed description +parsing this message. See \refmodule{email.errors} for a detailed description of the possible parsing defects. \versionadded{2.4} \end{datadesc} - -\subsubsection{Deprecated methods} - -\versionchanged[The \method{add_payload()} method was removed; use the -\method{attach()} method instead]{2.4} - -The following methods are deprecated. They are documented here for -completeness. - -\begin{methoddesc}[Message]{get_type}{\optional{failobj}} -Return the message's content type, as a string of the form -\mimetype{maintype/subtype} as taken from the -\mailheader{Content-Type} header. -The returned string is coerced to lowercase. - -If there is no \mailheader{Content-Type} header in the message, -\var{failobj} is returned (defaults to \code{None}). - -\deprecated{2.2.2}{Use the \method{get_content_type()} method instead.} -\end{methoddesc} - -\begin{methoddesc}[Message]{get_main_type}{\optional{failobj}} -Return the message's \emph{main} content type. This essentially returns the -\var{maintype} part of the string returned by \method{get_type()}, with the -same semantics for \var{failobj}. - -\deprecated{2.2.2}{Use the \method{get_content_maintype()} method instead.} -\end{methoddesc} - -\begin{methoddesc}[Message]{get_subtype}{\optional{failobj}} -Return the message's sub-content type. This essentially returns the -\var{subtype} part of the string returned by \method{get_type()}, with the -same semantics for \var{failobj}. - -\deprecated{2.2.2}{Use the \method{get_content_subtype()} method instead.} -\end{methoddesc} - diff --git a/Doc/lib/emailmimebase.tex b/Doc/lib/emailmimebase.tex index 070c9a2..4735be3 100644 --- a/Doc/lib/emailmimebase.tex +++ b/Doc/lib/emailmimebase.tex @@ -1,3 +1,11 @@ +\declaremodule{standard}{email.mime} +\declaremodule{standard}{email.mime.base} +\declaremodule{standard}{email.mime.nonmultipart} +\declaremodule{standard}{email.mime.multipart} +\declaremodule{standard}{email.mime.audio} +\declaremodule{standard}{email.mime.image} +\declaremodule{standard}{email.mime.message} +\declaremodule{standard}{email.mime.text} Ordinarily, you get a message object structure by passing a file or some text to a parser, which parses the text and returns the root message object. However you can also build a complete message @@ -6,26 +14,16 @@ hand. In fact, you can also take an existing structure and add new \class{Message} objects, move them around, etc. This makes a very convenient interface for slicing-and-dicing MIME messages. -You can create a new object structure by creating \class{Message} -instances, adding attachments and all the appropriate headers manually. -For MIME messages though, the \module{email} package provides some -convenient subclasses to make things easier. Each of these classes -should be imported from a module with the same name as the class, from -within the \module{email} package. E.g.: - -\begin{verbatim} -import email.MIMEImage.MIMEImage -\end{verbatim} - -or - -\begin{verbatim} -from email.MIMEText import MIMEText -\end{verbatim} +You can create a new object structure by creating \class{Message} instances, +adding attachments and all the appropriate headers manually. For MIME +messages though, the \module{email} package provides some convenient +subclasses to make things easier. Here are the classes: \begin{classdesc}{MIMEBase}{_maintype, _subtype, **_params} +Module: \module{email.mime.base} + This is the base class for all the MIME-specific subclasses of \class{Message}. Ordinarily you won't create instances specifically of \class{MIMEBase}, although you could. \class{MIMEBase} is provided @@ -45,6 +43,8 @@ The \class{MIMEBase} class always adds a \mailheader{Content-Type} header \end{classdesc} \begin{classdesc}{MIMENonMultipart}{} +Module: \module{email.mime.nonmultipart} + A subclass of \class{MIMEBase}, this is an intermediate base class for MIME messages that are not \mimetype{multipart}. The primary purpose of this class is to prevent the use of the \method{attach()} method, @@ -57,6 +57,7 @@ exception is raised. \begin{classdesc}{MIMEMultipart}{\optional{subtype\optional{, boundary\optional{, _subparts\optional{, _params}}}}} +Module: \module{email.mime.multipart} A subclass of \class{MIMEBase}, this is an intermediate base class for MIME messages that are \mimetype{multipart}. Optional \var{_subtype} @@ -80,8 +81,31 @@ argument, which is a keyword dictionary. \versionadded{2.2.2} \end{classdesc} +\begin{classdesc}{MIMEApplication}{_data\optional{, _subtype\optional{, + _encoder\optional{, **_params}}}} +Module: \module{email.mime.application} + +A subclass of \class{MIMENonMultipart}, the \class{MIMEApplication} class is +used to represent MIME message objects of major type \mimetype{application}. +\var{_data} is a string containing the raw byte data. Optional \var{_subtype} +specifies the MIME subtype and defaults to \mimetype{octet-stream}. + +Optional \var{_encoder} is a callable (i.e. function) which will +perform the actual encoding of the data for transport. This +callable takes one argument, which is the \class{MIMEApplication} instance. +It should use \method{get_payload()} and \method{set_payload()} to +change the payload to encoded form. It should also add any +\mailheader{Content-Transfer-Encoding} or other headers to the message +object as necessary. The default encoding is base64. See the +\refmodule{email.encoders} module for a list of the built-in encoders. + +\var{_params} are passed straight through to the base class constructor. +\versionadded{2.5} +\end{classdesc} + \begin{classdesc}{MIMEAudio}{_audiodata\optional{, _subtype\optional{, _encoder\optional{, **_params}}}} +Module: \module{email.mime.audio} A subclass of \class{MIMENonMultipart}, the \class{MIMEAudio} class is used to create MIME message objects of major type \mimetype{audio}. @@ -100,13 +124,14 @@ It should use \method{get_payload()} and \method{set_payload()} to change the payload to encoded form. It should also add any \mailheader{Content-Transfer-Encoding} or other headers to the message object as necessary. The default encoding is base64. See the -\refmodule{email.Encoders} module for a list of the built-in encoders. +\refmodule{email.encoders} module for a list of the built-in encoders. \var{_params} are passed straight through to the base class constructor. \end{classdesc} \begin{classdesc}{MIMEImage}{_imagedata\optional{, _subtype\optional{, _encoder\optional{, **_params}}}} +Module: \module{email.mime.image} A subclass of \class{MIMENonMultipart}, the \class{MIMEImage} class is used to create MIME message objects of major type \mimetype{image}. @@ -125,13 +150,15 @@ It should use \method{get_payload()} and \method{set_payload()} to change the payload to encoded form. It should also add any \mailheader{Content-Transfer-Encoding} or other headers to the message object as necessary. The default encoding is base64. See the -\refmodule{email.Encoders} module for a list of the built-in encoders. +\refmodule{email.encoders} module for a list of the built-in encoders. \var{_params} are passed straight through to the \class{MIMEBase} constructor. \end{classdesc} \begin{classdesc}{MIMEMessage}{_msg\optional{, _subtype}} +Module: \module{email.mime.message} + A subclass of \class{MIMENonMultipart}, the \class{MIMEMessage} class is used to create MIME objects of main type \mimetype{message}. \var{_msg} is used as the payload, and must be an instance of class @@ -143,6 +170,8 @@ to \mimetype{rfc822}. \end{classdesc} \begin{classdesc}{MIMEText}{_text\optional{, _subtype\optional{, _charset}}} +Module: \module{email.mime.text} + A subclass of \class{MIMENonMultipart}, the \class{MIMEText} class is used to create MIME objects of major type \mimetype{text}. \var{_text} is the string for the payload. \var{_subtype} is the diff --git a/Doc/lib/emailparser.tex b/Doc/lib/emailparser.tex index 5fac92f..609fa40 100644 --- a/Doc/lib/emailparser.tex +++ b/Doc/lib/emailparser.tex @@ -1,4 +1,4 @@ -\declaremodule{standard}{email.Parser} +\declaremodule{standard}{email.parser} \modulesynopsis{Parse flat text email messages to produce a message object structure.} @@ -41,9 +41,10 @@ message object trees any way it finds necessary. \versionadded{2.4} -The \class{FeedParser} provides an API that is conducive to incremental -parsing of email messages, such as would be necessary when reading the text of -an email message from a source that can block (e.g. a socket). The +The \class{FeedParser}, imported from the \module{email.feedparser} module, +provides an API that is conducive to incremental parsing of email messages, +such as would be necessary when reading the text of an email message from a +source that can block (e.g. a socket). The \class{FeedParser} can of course be used to parse an email message fully contained in a string or a file, but the classic \class{Parser} API may be more convenient for such use cases. The semantics and results of the two @@ -56,14 +57,14 @@ accurate when parsing standards-compliant messages, and it does a very good job of parsing non-compliant messages, providing information about how a message was deemed broken. It will populate a message object's \var{defects} attribute with a list of any problems it found in a message. See the -\refmodule{email.Errors} module for the list of defects that it can find. +\refmodule{email.errors} module for the list of defects that it can find. Here is the API for the \class{FeedParser}: \begin{classdesc}{FeedParser}{\optional{_factory}} Create a \class{FeedParser} instance. Optional \var{_factory} is a no-argument callable that will be called whenever a new message object is -needed. It defaults to the \class{email.Message.Message} class. +needed. It defaults to the \class{email.message.Message} class. \end{classdesc} \begin{methoddesc}[FeedParser]{feed}{data} @@ -82,21 +83,22 @@ more data to a closed \class{FeedParser}. \subsubsection{Parser class API} -The \class{Parser} provides an API that can be used to parse a message when -the complete contents of the message are available in a string or file. The -\module{email.Parser} module also provides a second class, called +The \class{Parser} class, imported from the \module{email.parser} module, +provides an API that can be used to parse a message when the complete contents +of the message are available in a string or file. The +\module{email.parser} module also provides a second class, called \class{HeaderParser} which can be used if you're only interested in the headers of the message. \class{HeaderParser} can be much faster in these situations, since it does not attempt to parse the message body, instead setting the payload to the raw body as a string. \class{HeaderParser} has the same API as the \class{Parser} class. -\begin{classdesc}{Parser}{\optional{_class\optional{, strict}}} +\begin{classdesc}{Parser}{\optional{_class}} The constructor for the \class{Parser} class takes an optional argument \var{_class}. This must be a callable factory (such as a function or a class), and it is used whenever a sub-message object needs to be created. It defaults to \class{Message} (see -\refmodule{email.Message}). The factory will be called without +\refmodule{email.message}). The factory will be called without arguments. The optional \var{strict} flag is ignored. \deprecated{2.4}{Because the @@ -201,6 +203,6 @@ Here are some notes on the parsing semantics: \method{is_multipart()} method may return \code{False}. If such messages were parsed with the \class{FeedParser}, they will have an instance of the \class{MultipartInvariantViolationDefect} class in their - \var{defects} attribute list. See \refmodule{email.Errors} for + \var{defects} attribute list. See \refmodule{email.errors} for details. \end{itemize} diff --git a/Doc/lib/emailutil.tex b/Doc/lib/emailutil.tex index 491a2b9..fe96473 100644 --- a/Doc/lib/emailutil.tex +++ b/Doc/lib/emailutil.tex @@ -1,7 +1,7 @@ -\declaremodule{standard}{email.Utils} +\declaremodule{standard}{email.utils} \modulesynopsis{Miscellaneous email package utilities.} -There are several useful utilities provided in the \module{email.Utils} +There are several useful utilities provided in the \module{email.utils} module: \begin{funcdesc}{quote}{str} @@ -38,7 +38,7 @@ values as might be returned by \method{Message.get_all()}. Here's a simple example that gets all the recipients of a message: \begin{verbatim} -from email.Utils import getaddresses +from email.utils import getaddresses tos = msg.get_all('to', []) ccs = msg.get_all('cc', []) diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex index fad8fe7..eac35de 100644 --- a/Doc/lib/lib.tex +++ b/Doc/lib/lib.tex @@ -87,7 +87,6 @@ and how to embed it in other applications. \input{libstrings} % String Services \input{libstring} \input{libre} -\input{libreconvert} \input{libstruct} % XXX also/better in File Formats? \input{libdifflib} \input{libstringio} @@ -372,6 +371,7 @@ and how to embed it in other applications. \input{libbltin} % really __builtin__ \input{libmain} % really __main__ \input{libwarnings} +\input{libcontextlib} \input{libatexit} \input{libtraceback} \input{libfuture} % really __future__ @@ -395,6 +395,7 @@ and how to embed it in other applications. \input{libzipimport} \input{libpkgutil} \input{libmodulefinder} +\input{librunpy} % ============= @@ -454,8 +455,6 @@ and how to embed it in other applications. %\input{libcmpcache} %\input{libcmp} %\input{libni} -%\input{libregex} -%\input{libregsub} \chapter{Reporting Bugs} \input{reportingbugs} diff --git a/Doc/lib/libarray.tex b/Doc/lib/libarray.tex index 897310d..eaf5888 100644 --- a/Doc/lib/libarray.tex +++ b/Doc/lib/libarray.tex @@ -139,8 +139,8 @@ file using the \method{fromfile()} method). \end{methoddesc} \begin{methoddesc}[array]{fromunicode}{s} -Extends this array with data from the given unicode string. -The array must be a type 'u' array; otherwise a ValueError +Extends this array with data from the given unicode string. The array +must be a type \code{'u'} array; otherwise a \exception{ValueError} is raised. Use \samp{array.fromstring(ustr.decode(enc))} to append Unicode data to an array of some other type. \end{methoddesc} @@ -197,8 +197,8 @@ be written to a file by the \method{tofile()} method.) \begin{methoddesc}[array]{tounicode}{} Convert the array to a unicode string. The array must be -a type 'u' array; otherwise a ValueError is raised. Use -array.tostring().decode(enc) to obtain a unicode string +a type \code{'u'} array; otherwise a \exception{ValueError} is raised. +Use \samp{array.tostring().decode(enc)} to obtain a unicode string from an array of some other type. \end{methoddesc} diff --git a/Doc/lib/libast.tex b/Doc/lib/libast.tex index b3c3148..b2956ae 100644 --- a/Doc/lib/libast.tex +++ b/Doc/lib/libast.tex @@ -47,11 +47,11 @@ question mark), the value might be \code{None}. If the attributes can have zero-or-more values (marked with an asterisk), the values are represented as Python lists. -\subsection{Abstract Grammar} +\section{Abstract Grammar} The module defines a string constant \code{__version__} which is the decimal subversion revision number of the file shown below. The abstract grammar is currently defined as follows: -\verbatiminput{../../Parser/Python.asdl} \ No newline at end of file +\verbatiminput{../../Parser/Python.asdl} diff --git a/Doc/lib/libaudioop.tex b/Doc/lib/libaudioop.tex index 76bcdbf..52c6f3d 100644 --- a/Doc/lib/libaudioop.tex +++ b/Doc/lib/libaudioop.tex @@ -12,9 +12,10 @@ is the same format as used by the \refmodule{al} and \refmodule{sunaudiodev} modules. All scalar items are integers, unless specified otherwise. % This para is mostly here to provide an excuse for the index entries... -This module provides support for u-LAW and Intel/DVI ADPCM encodings. +This module provides support for a-LAW, u-LAW and Intel/DVI ADPCM encodings. \index{Intel/DVI ADPCM} \index{ADPCM, Intel/DVI} +\index{a-LAW} \index{u-LAW} A few of the more complicated operations only take 16-bit samples, @@ -42,6 +43,13 @@ Return a tuple \code{(\var{sample}, \var{newstate})} where the sample has the width specified in \var{width}. \end{funcdesc} +\begin{funcdesc}{alaw2lin}{fragment, width} +Convert sound fragments in a-LAW encoding to linearly encoded sound +fragments. a-LAW encoding always uses 8 bits samples, so \var{width} +refers only to the sample width of the output fragment here. +\versionadded{2.5} +\end{funcdesc} + \begin{funcdesc}{avg}{fragment, width} Return the average over all samples in the fragment. \end{funcdesc} @@ -98,10 +106,6 @@ The routine takes time proportional to \code{len(\var{fragment})}. Return the value of sample \var{index} from the fragment. \end{funcdesc} -\begin{funcdesc}{lin2lin}{fragment, width, newwidth} -Convert samples between 1-, 2- and 4-byte formats. -\end{funcdesc} - \begin{funcdesc}{lin2adpcm}{fragment, width, state} Convert samples to 4 bit Intel/DVI ADPCM encoding. ADPCM coding is an adaptive coding scheme, whereby each 4 bit number is the difference @@ -117,6 +121,18 @@ passed as the state. \var{adpcmfrag} is the ADPCM coded fragment packed 2 4-bit values per byte. \end{funcdesc} +\begin{funcdesc}{lin2alaw}{fragment, width} +Convert samples in the audio fragment to a-LAW encoding and return +this as a Python string. a-LAW is an audio encoding format whereby +you get a dynamic range of about 13 bits using only 8 bit samples. It +is used by the Sun audio hardware, among others. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{lin2lin}{fragment, width, newwidth} +Convert samples between 1-, 2- and 4-byte formats. +\end{funcdesc} + \begin{funcdesc}{lin2ulaw}{fragment, width} Convert samples in the audio fragment to u-LAW encoding and return this as a Python string. u-LAW is an audio encoding format whereby diff --git a/Doc/lib/libbsddb.tex b/Doc/lib/libbsddb.tex index fa7bb4b..a5cda6d 100644 --- a/Doc/lib/libbsddb.tex +++ b/Doc/lib/libbsddb.tex @@ -15,9 +15,8 @@ other objects as keys or to store other kinds of objects the user must serialize them somehow, typically using \function{marshal.dumps()} or \function{pickle.dumps}. -Starting with Python 2.3 the \module{bsddb} module requires the -Berkeley DB library version 3.2 or later (it is known to work with 3.2 -through 4.3 at the time of this writing). +The \module{bsddb} module requires a Berkeley DB library version from +3.3 thru 4.4. \begin{seealso} \seeurl{http://pybsddb.sourceforge.net/}{Website with documentation diff --git a/Doc/lib/libcalendar.tex b/Doc/lib/libcalendar.tex index bf3a7d6..acfd2da 100644 --- a/Doc/lib/libcalendar.tex +++ b/Doc/lib/libcalendar.tex @@ -15,12 +15,177 @@ convention). Use \function{setfirstweekday()} to set the first day of the week to Sunday (6) or to any other weekday. Parameters that specify dates are given as integers. -Most of these functions rely on the \module{datetime} module which -uses an idealized calendar, the current Gregorian calendar indefinitely -extended in both directions. This matches the definition of the -"proleptic Gregorian" calendar in Dershowitz and Reingold's book -"Calendrical Calculations", where it's the base calendar for all -computations. +Most of these functions and classses rely on the \module{datetime} +module which uses an idealized calendar, the current Gregorian +calendar indefinitely extended in both directions. This matches +the definition of the "proleptic Gregorian" calendar in Dershowitz +and Reingold's book "Calendrical Calculations", where it's the +base calendar for all computations. + +\begin{classdesc}{Calendar}{\optional{firstweekday}} +Creates a \class{Calendar} object. \var{firstweekday} is an integer +specifying the first day of the week. \code{0} is Monday (the default), +\code{6} is Sunday. + +A \class{Calendar} object provides several methods that can +be used for preparing the calendar data for formatting. This +class doesn't do any formatting itself. This is the job of +subclasses. +\versionadded{2.5} +\end{classdesc} + +\class{Calendar} instances have the following methods: + +\begin{methoddesc}{iterweekdays}{weekday} +Return an iterator for the week day numbers that will be used +for one week. The first number from the iterator will be the +same as the number returned by \method{firstweekday()}. +\end{methoddesc} + +\begin{methoddesc}{itermonthdates}{year, month} +Return an iterator for the month \var{month} (1-12) in the +year \var{year}. This iterator will return all days (as +\class{datetime.date} objects) for the month and all days +before the start of the month or after the end of the month +that are required to get a complete week. +\end{methoddesc} + +\begin{methoddesc}{itermonthdays2}{year, month} +Return an iterator for the month \var{month} in the year +\var{year} similar to \method{itermonthdates()}. Days returned +will be tuples consisting of a day number and a week day +number. +\end{methoddesc} + +\begin{methoddesc}{itermonthdays}{year, month} +Return an iterator for the month \var{month} in the year +\var{year} similar to \method{itermonthdates()}. Days returned +will simply be day numbers. +\end{methoddesc} + +\begin{methoddesc}{monthdatescalendar}{year, month} +Return a list of the weeks in the month \var{month} of +the \var{year} as full weeks. Weeks are lists of seven +\class{datetime.date} objects. +\end{methoddesc} + +\begin{methoddesc}{monthdays2calendar}{year, month} +Return a list of the weeks in the month \var{month} of +the \var{year} as full weeks. Weeks are lists of seven +tuples of day numbers and weekday numbers. +\end{methoddesc} + +\begin{methoddesc}{monthdayscalendar}{year, month} +Return a list of the weeks in the month \var{month} of +the \var{year} as full weeks. Weeks are lists of seven +day numbers. +\end{methoddesc} + +\begin{methoddesc}{yeardatescalendar}{year, month\optional{, width}} +Return the data for the specified year ready for formatting. The return +value is a list of month rows. Each month row contains up to \var{width} +months (defaulting to 3). Each month contains between 4 and 6 weeks and +each week contains 1--7 days. Days are \class{datetime.date} objects. +\end{methoddesc} + +\begin{methoddesc}{yeardays2calendar}{year, month\optional{, width}} +Return the data for the specified year ready for formatting (similar to +\method{yeardatescalendar()}). Entries in the week lists are tuples of +day numbers and weekday numbers. Day numbers outside this month are zero. +\end{methoddesc} + +\begin{methoddesc}{yeardayscalendar}{year, month\optional{, width}} +Return the data for the specified year ready for formatting (similar to +\method{yeardatescalendar()}). Entries in the week lists are day numbers. +Day numbers outside this month are zero. +\end{methoddesc} + + +\begin{classdesc}{TextCalendar}{\optional{firstweekday}} +This class can be used to generate plain text calendars. + +\versionadded{2.5} +\end{classdesc} + +\class{TextCalendar} instances have the following methods: + +\begin{methoddesc}{formatmonth}{theyear, themonth\optional{, w\optional{, l}}} +Return a month's calendar in a multi-line string. If \var{w} is +provided, it specifies the width of the date columns, which are +centered. If \var{l} is given, it specifies the number of lines that +each week will use. Depends on the first weekday as set by +\function{setfirstweekday()}. +\end{methoddesc} + +\begin{methoddesc}{prmonth}{theyear, themonth\optional{, w\optional{, l}}} +Print a month's calendar as returned by \method{formatmonth()}. +\end{methoddesc} + +\begin{methoddesc}{formatyear}{theyear, themonth\optional{, w\optional{, + l\optional{, c\optional{, m}}}}} +Return a \var{m}-column calendar for an entire year as a multi-line string. +Optional parameters \var{w}, \var{l}, and \var{c} are for date column +width, lines per week, and number of spaces between month columns, +respectively. Depends on the first weekday as set by +\method{setfirstweekday()}. The earliest year for which a calendar can +be generated is platform-dependent. +\end{methoddesc} + +\begin{methoddesc}{pryear}{theyear\optional{, w\optional{, l\optional{, + c\optional{, m}}}}} +Print the calendar for an entire year as returned by \method{formatyear()}. +\end{methoddesc} + + +\begin{classdesc}{HTMLCalendar}{\optional{firstweekday}} +This class can be used to generate HTML calendars. + +\versionadded{2.5} +\end{classdesc} + +\class{HTMLCalendar} instances have the following methods: + +\begin{methoddesc}{formatmonth}{theyear, themonth\optional{, withyear}} +Return a month's calendar as an HTML table. If \var{withyear} is +true the year will be included in the header, otherwise just the +month name will be used. +\end{methoddesc} + +\begin{methoddesc}{formatyear}{theyear, themonth\optional{, width}} +Return a year's calendar as an HTML table. \var{width} (defaulting to 3) +specifies the number of months per row. +\end{methoddesc} + +\begin{methoddesc}{formatyearpage}{theyear, themonth\optional{, + width\optional{, css\optional{, encoding}}}} +Return a year's calendar as a complete HTML page. \var{width} +(defaulting to 3) specifies the number of months per row. \var{css} +is the name for the cascading style sheet to be used. \constant{None} +can be passed if no style sheet should be used. \var{encoding} +specifies the encoding to be used for the output (defaulting +to the system default encoding). +\end{methoddesc} + + +\begin{classdesc}{LocaleTextCalendar}{\optional{firstweekday\optional{, locale}}} +This subclass of \class{TextCalendar} can be passed a locale name in the +constructor and will return month and weekday names in the specified locale. +If this locale includes an encoding all strings containing month and weekday +names will be returned as unicode. +\versionadded{2.5} +\end{classdesc} + + +\begin{classdesc}{LocaleHTMLCalendar}{\optional{firstweekday\optional{, locale}}} +This subclass of \class{HTMLCalendar} can be passed a locale name in the +constructor and will return month and weekday names in the specified locale. +If this locale includes an encoding all strings containing month and weekday +names will be returned as unicode. +\versionadded{2.5} +\end{classdesc} + + +For simple text calendars this module provides the following functions. \begin{funcdesc}{setfirstweekday}{weekday} Sets the weekday (\code{0} is Monday, \code{6} is Sunday) to start @@ -80,11 +245,8 @@ Prints a month's calendar as returned by \function{month()}. \end{funcdesc} \begin{funcdesc}{month}{theyear, themonth\optional{, w\optional{, l}}} -Returns a month's calendar in a multi-line string. If \var{w} is -provided, it specifies the width of the date columns, which are -centered. If \var{l} is given, it specifies the number of lines that -each week will use. Depends on the first weekday as set by -\function{setfirstweekday()}. +Returns a month's calendar in a multi-line string using the +\method{formatmonth} of the \class{TextCalendar} class. \versionadded{2.0} \end{funcdesc} @@ -94,12 +256,8 @@ Prints the calendar for an entire year as returned by \end{funcdesc} \begin{funcdesc}{calendar}{year\optional{, w\optional{, l\optional{c}}}} -Returns a 3-column calendar for an entire year as a multi-line string. -Optional parameters \var{w}, \var{l}, and \var{c} are for date column -width, lines per week, and number of spaces between month columns, -respectively. Depends on the first weekday as set by -\function{setfirstweekday()}. The earliest year for which a calendar can -be generated is platform-dependent. +Returns a 3-column calendar for an entire year as a multi-line string +using the \method{formatyear} of the \class{TextCalendar} class. \versionadded{2.0} \end{funcdesc} diff --git a/Doc/lib/libcgi.tex b/Doc/lib/libcgi.tex index cd6f58a..1dd7e03 100644 --- a/Doc/lib/libcgi.tex +++ b/Doc/lib/libcgi.tex @@ -323,7 +323,7 @@ not included. The optional argument \var{strict_parsing} is a flag indicating what to do with parsing errors. If false (the default), errors -are silently ignored. If true, errors raise a ValueError +are silently ignored. If true, errors raise a \exception{ValueError} exception. Use the \function{\refmodule{urllib}.urlencode()} function to convert @@ -347,7 +347,7 @@ not included. The optional argument \var{strict_parsing} is a flag indicating what to do with parsing errors. If false (the default), errors -are silently ignored. If true, errors raise a ValueError +are silently ignored. If true, errors raise a \exception{ValueError} exception. Use the \function{\refmodule{urllib}.urlencode()} function to convert diff --git a/Doc/lib/libcodecs.tex b/Doc/lib/libcodecs.tex index 1806ef0..8a2417e 100644 --- a/Doc/lib/libcodecs.tex +++ b/Doc/lib/libcodecs.tex @@ -112,6 +112,7 @@ class or factory function. Raises a \exception{LookupError} in case the encoding cannot be found or the codec doesn't support an incremental encoder. +\versionadded{2.5} \end{funcdesc} \begin{funcdesc}{getincrementaldecoder}{encoding} @@ -120,6 +121,7 @@ class or factory function. Raises a \exception{LookupError} in case the encoding cannot be found or the codec doesn't support an incremental decoder. +\versionadded{2.5} \end{funcdesc} \begin{funcdesc}{getreader}{encoding} @@ -150,7 +152,7 @@ unencodable part of the input and a position where encoding should continue. The encoder will encode the replacement and continue encoding the original input at the specified position. Negative position values will be treated as being relative to the end of the input string. If the -resulting position is out of bound an IndexError will be raised. +resulting position is out of bound an \exception{IndexError} will be raised. Decoding and translating works similar, except \exception{UnicodeDecodeError} or \exception{UnicodeTranslateError} will be passed to the handler and @@ -229,12 +231,14 @@ an encoding error occurs. Uses an incremental encoder to iteratively encode the input provided by \var{iterable}. This function is a generator. \var{errors} (as well as any other keyword argument) is passed through to the incremental encoder. +\versionadded{2.5} \end{funcdesc} \begin{funcdesc}{iterdecode}{iterable, encoding\optional{, errors}} Uses an incremental decoder to iteratively decode the input provided by \var{iterable}. This function is a generator. \var{errors} (as well as any other keyword argument) is passed through to the incremental encoder. +\versionadded{2.5} \end{funcdesc} The module also provides the following constants which are useful @@ -355,6 +359,8 @@ encoded/decoded with the stateless encoder/decoder. \subsubsection{IncrementalEncoder Objects \label{incremental-encoder-objects}} +\versionadded{2.5} + The \class{IncrementalEncoder} class is used for encoding an input in multiple steps. It defines the following methods which every incremental encoder must define in order to be compatible to the Python codec registry. @@ -437,6 +443,10 @@ define in order to be compatible to the Python codec registry. Decodes \var{object} (taking the current state of the decoder into account) and returns the resulting decoded object. If this is the last call to \method{decode} \var{final} must be true (the default is false). + If \var{final} is true the decoder must decode the input completely and must + flush all buffers. If this isn't possible (e.g. because of incomplete byte + sequences at the end of the input) it must initiate error handling just like + in the stateless case (which might raise an exception). \end{methoddesc} \begin{methoddesc}{reset}{} @@ -690,10 +700,10 @@ transformation can be done (these methods are also called encodings). The simplest method is to map the codepoints 0-255 to the bytes \code{0x0}-\code{0xff}. This means that a unicode object that contains codepoints above \code{U+00FF} can't be encoded with this method (which -is called \code{'latin-1'} or \code{'iso-8859-1'}). unicode.encode() will -raise a UnicodeEncodeError that looks like this: \samp{UnicodeEncodeError: -'latin-1' codec can't encode character u'\e u1234' in position 3: ordinal -not in range(256)}. +is called \code{'latin-1'} or \code{'iso-8859-1'}). +\function{unicode.encode()} will raise a \exception{UnicodeEncodeError} +that looks like this: \samp{UnicodeEncodeError: 'latin-1' codec can't +encode character u'\e u1234' in position 3: ordinal not in range(256)}. There's another group of encodings (the so called charmap encodings) that choose a different subset of all unicode code points and how @@ -1220,7 +1230,7 @@ listed as operand type in the table. \lineiv{rot_13} {rot13} - {byte string} + {Unicode string} {Returns the Caesar-cypher encryption of the operand} \lineiv{string_escape} diff --git a/Doc/lib/libcollections.tex b/Doc/lib/libcollections.tex index 542ef6b..d9bfa39 100644 --- a/Doc/lib/libcollections.tex +++ b/Doc/lib/libcollections.tex @@ -10,9 +10,11 @@ This module implements high-performance container datatypes. Currently, there are two datatypes, deque and defaultdict. -Future additions may include B-trees and Fibonacci heaps. +Future additions may include balanced trees and ordered dictionaries. \versionchanged[Added defaultdict]{2.5} +\subsection{\class{deque} objects \label{deque-objects}} + \begin{funcdesc}{deque}{\optional{iterable}} Returns a new deque objected initialized left-to-right (using \method{append()}) with data from \var{iterable}. If \var{iterable} @@ -137,7 +139,7 @@ IndexError: pop from an empty deque deque(['c', 'b', 'a']) \end{verbatim} -\subsection{Recipes \label{deque-recipes}} +\subsubsection{Recipes \label{deque-recipes}} This section shows various approaches to working with deques. @@ -215,6 +217,8 @@ def maketree(iterable): +\subsection{\class{defaultdict} objects \label{defaultdict-objects}} + \begin{funcdesc}{defaultdict}{\optional{default_factory\optional{, ...}}} Returns a new dictionary-like object. \class{defaultdict} is a subclass of the builtin \class{dict} class. It overrides one method and adds one @@ -255,3 +259,79 @@ the standard \class{dict} operations: from the first argument to the constructor, if present, or to \code{None}, if absent. \end{datadesc} + + +\subsubsection{\class{defaultdict} Examples \label{defaultdict-examples}} + +Using \class{list} as the \member{default_factory}, it is easy to group +a sequence of key-value pairs into a dictionary of lists: + +\begin{verbatim} +>>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)] +>>> d = defaultdict(list) +>>> for k, v in s: + d[k].append(v) + +>>> d.items() +[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])] +\end{verbatim} + +When each key is encountered for the first time, it is not already in the +mapping; so an entry is automatically created using the +\member{default_factory} function which returns an empty \class{list}. The +\method{list.append()} operation then attaches the value to the new list. When +keys are encountered again, the look-up proceeds normally (returning the list +for that key) and the \method{list.append()} operation adds another value to +the list. This technique is simpler and faster than an equivalent technique +using \method{dict.setdefault()}: + +\begin{verbatim} +>>> d = {} +>>> for k, v in s: + d.setdefault(k, []).append(v) + +>>> d.items() +[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])] +\end{verbatim} + +Setting the \member{default_factory} to \class{int} makes the +\class{defaultdict} useful for counting (like a bag or multiset in other +languages): + +\begin{verbatim} +>>> s = 'mississippi' +>>> d = defaultdict(int) +>>> for k in s: + d[k] += 1 + +>>> d.items() +[('i', 4), ('p', 2), ('s', 4), ('m', 1)] +\end{verbatim} + +When a letter is first encountered, it is missing from the mapping, so the +\member{default_factory} function calls \function{int()} to supply a default +count of zero. The increment operation then builds up the count for each +letter. This technique makes counting simpler and faster than an equivalent +technique using \method{dict.get()}: + +\begin{verbatim} +>>> d = {} +>>> for k in s: + d[k] = d.get(k, 0) + 1 + +>>> d.items() +[('i', 4), ('p', 2), ('s', 4), ('m', 1)] +\end{verbatim} + +Setting the \member{default_factory} to \class{set} makes the +\class{defaultdict} useful for building a dictionary of sets: + +\begin{verbatim} +>>> s = [('red', 1), ('blue', 2), ('red', 3), ('blue', 4), ('red', 1), ('blue', 4)] +>>> d = defaultdict(set) +>>> for k, v in s: + d[k].add(v) + +>>> d.items() +[('blue', set([2, 4])), ('red', set([1, 3]))] +\end{verbatim} diff --git a/Doc/lib/libcontextlib.tex b/Doc/lib/libcontextlib.tex new file mode 100644 index 0000000..46f9cdd --- /dev/null +++ b/Doc/lib/libcontextlib.tex @@ -0,0 +1,144 @@ +\section{\module{contextlib} --- + Utilities for \keyword{with}-statement contexts.} + +\declaremodule{standard}{contextlib} +\modulesynopsis{Utilities for \keyword{with}-statement contexts.} + +\versionadded{2.5} + +This module provides utilities for common tasks involving the +\keyword{with} statement. + +Functions provided: + +\begin{funcdesc}{contextmanager}{func} +This function is a decorator that can be used to define context managers +for use with the \keyword{with} statement, without needing to create a +class or separate \method{__enter__()} and \method{__exit__()} methods. + +A simple example: + +\begin{verbatim} +from __future__ import with_statement +from contextlib import contextmanager + +@contextmanager +def tag(name): + print "<%s>" % name + yield + print "" % name + +>>> with tag("h1"): +... print "foo" +... +

+foo +

+\end{verbatim} + +When called, the decorated function must return a generator-iterator. +This iterator must yield exactly one value, which will be bound to the +targets in the \keyword{with} statement's \keyword{as} clause, if any. + +At the point where the generator yields, the block nested in the +\keyword{with} statement is executed. The generator is then resumed +after the block is exited. If an unhandled exception occurs in the +block, it is reraised inside the generator at the point where the yield +occurred. Thus, you can use a +\keyword{try}...\keyword{except}...\keyword{finally} statement to trap +the error (if any), or ensure that some cleanup takes place. + +Note that you can use \code{@contextmanager} to define a context +manager's \method{__context__} method. This is usually more convenient +than creating another class just to serve as a context. For example: + +\begin{verbatim} +from __future__ import with_statement +from contextlib import contextmanager + +class Tag: + def __init__(self, name): + self.name = name + + @contextmanager + def __context__(self): + print "<%s>" % self.name + yield self + print "" % self.name + +h1 = Tag("h1") + +>>> with h1 as me: +... print "hello from", me +

+hello from <__main__.Tag instance at 0x402ce8ec> +

+\end{verbatim} +\end{funcdesc} + +\begin{funcdesc}{nested}{ctx1\optional{, ctx2\optional{, ...}}} +Combine multiple context managers into a single nested context manager. + +Code like this: + +\begin{verbatim} +from contextlib import nested + +with nested(A, B, C) as (X, Y, Z): + do_something() +\end{verbatim} + +is equivalent to this: + +\begin{verbatim} +with A as X: + with B as Y: + with C as Z: + do_something() +\end{verbatim} + +Note that if one of the nested contexts' \method{__exit__()} method +raises an exception, any previous exception state will be lost; the new +exception will be passed to the outer contexts' \method{__exit__()} +method(s), if any. In general, \method{__exit__()} methods should avoid +raising exceptions, and in particular they should not re-raise a +passed-in exception. +\end{funcdesc} + +\label{context-closing} +\begin{funcdesc}{closing}{thing} +Return a context manager that closes \var{thing} upon completion of the +block. This is basically equivalent to: + +\begin{verbatim} +from contextlib import contextmanager + +@contextmanager +def closing(thing): + try: + yield thing + finally: + thing.close() +\end{verbatim} + +And lets you write code like this: +\begin{verbatim} +from __future__ import with_statement +from contextlib import closing +import codecs + +with closing(codecs.open("foo", encoding="utf8")) as f: + for line in f: + print line.encode("latin1") +\end{verbatim} + +without needing to explicitly close \code{f}. Even if an error occurs, +\code{f.close()} will be called when the \keyword{with} block is exited. + +\end{funcdesc} + +\begin{seealso} + \seepep{0343}{The "with" statement} + {The specification, background, and examples for the + Python \keyword{with} statement.} +\end{seealso} diff --git a/Doc/lib/libcookielib.tex b/Doc/lib/libcookielib.tex index a35f97d..ef2d833 100644 --- a/Doc/lib/libcookielib.tex +++ b/Doc/lib/libcookielib.tex @@ -249,7 +249,7 @@ anyway, unless you ask otherwise by passing a true ignore_discard=\constant{False}, ignore_expires=\constant{False}} Save cookies to a file. -This base class raises \class{NotImplementedError}. Subclasses may +This base class raises \exception{NotImplementedError}. Subclasses may leave this method unimplemented. \var{filename} is the name of file in which to save cookies. If diff --git a/Doc/lib/libcsv.tex b/Doc/lib/libcsv.tex index ba0df4f..65053c7 100644 --- a/Doc/lib/libcsv.tex +++ b/Doc/lib/libcsv.tex @@ -33,8 +33,9 @@ form using the \class{DictReader} and \class{DictWriter} classes. \begin{notice} This version of the \module{csv} module doesn't support Unicode input. Also, there are currently some issues regarding \ASCII{} NUL - characters. Accordingly, all input should generally be printable - \ASCII{} to be safe. These restrictions will be removed in the future. + characters. Accordingly, all input should be UTF-8 or printable + \ASCII{} to be safe; see the examples in section~\ref{csv-examples}. + These restrictions will be removed in the future. \end{notice} \begin{seealso} @@ -365,7 +366,7 @@ A read-only description of the dialect in use by the writer. -\subsection{Examples} +\subsection{Examples\label{csv-examples}} The simplest example of reading a CSV file: @@ -426,37 +427,99 @@ for row in csv.reader(['one,two,three']): \end{verbatim} The \module{csv} module doesn't directly support reading and writing -Unicode, but it is 8-bit clean save for some problems with \ASCII{} NUL -characters, so you can write classes that handle the encoding and decoding -for you as long as you avoid encodings like utf-16 that use NULs: +Unicode, but it is 8-bit-clean save for some problems with \ASCII{} NUL +characters. So you can write functions or classes that handle the +encoding and decoding for you as long as you avoid encodings like +UTF-16 that use NULs. UTF-8 is recommended. + +\function{unicode_csv_reader} below is a generator that wraps +\class{csv.reader} to handle Unicode CSV data (a list of Unicode +strings). \function{utf_8_encoder} is a generator that encodes the +Unicode strings as UTF-8, one string (or row) at a time. The encoded +strings are parsed by the CSV reader, and +\function{unicode_csv_reader} decodes the UTF-8-encoded cells back +into Unicode: \begin{verbatim} import csv +def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): + # csv.py doesn't do Unicode; encode temporarily as UTF-8: + csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), + dialect=dialect, **kwargs) + for row in csv_reader: + # decode UTF-8 back to Unicode, cell by cell: + yield [unicode(cell, 'utf-8') for cell in row] + +def utf_8_encoder(unicode_csv_data): + for line in unicode_csv_data: + yield line.encode('utf-8') +\end{verbatim} + +For all other encodings the following \class{UnicodeReader} and +\class{UnicodeWriter} classes can be used. They take an additional +\var{encoding} parameter in their constructor and make sure that the data +passes the real reader or writer encoded as UTF-8: + +\begin{verbatim} +import csv, codecs, cStringIO + +class UTF8Recoder: + """ + Iterator that reads an encoded stream and reencodes the input to UTF-8 + """ + def __init__(self, f, encoding): + self.reader = codecs.getreader(encoding)(f) + + def __iter__(self): + return self + + def next(self): + return self.reader.next().encode("utf-8") + class UnicodeReader: + """ + A CSV reader which will iterate over lines in the CSV file "f", + which is encoded in the given encoding. + """ + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + f = UTF8Recoder(f, encoding) self.reader = csv.reader(f, dialect=dialect, **kwds) - self.encoding = encoding def next(self): row = self.reader.next() - return [unicode(s, self.encoding) for s in row] + return [unicode(s, "utf-8") for s in row] def __iter__(self): return self class UnicodeWriter: + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + """ + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - self.writer = csv.writer(f, dialect=dialect, **kwds) - self.encoding = encoding + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() def writerow(self, row): - self.writer.writerow([s.encode(self.encoding) for s in row]) + self.writer.writerow([s.encode("utf-8") for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) def writerows(self, rows): for row in rows: self.writerow(row) \end{verbatim} - -They should work just like the \class{csv.reader} and \class{csv.writer} -classes but add an \var{encoding} parameter. diff --git a/Doc/lib/libdatetime.tex b/Doc/lib/libdatetime.tex index 4bba553..cae5d60 100644 --- a/Doc/lib/libdatetime.tex +++ b/Doc/lib/libdatetime.tex @@ -504,7 +504,7 @@ Instance methods: Return a string representing the date, controlled by an explicit format string. Format codes referring to hours, minutes or seconds will see 0 values. - See the section on \method{strftime()} behavior. + See section~\ref{strftime-behavior} -- \method{strftime()} behavior. \end{methoddesc} @@ -970,8 +970,8 @@ Instance methods: \begin{methoddesc}{strftime}{format} Return a string representing the date and time, controlled by an - explicit format string. See the section on \method{strftime()} - behavior. + explicit format string. See section~\ref{strftime-behavior} -- + \method{strftime()} behavior. \end{methoddesc} @@ -1100,7 +1100,8 @@ Instance methods: \begin{methoddesc}{strftime}{format} Return a string representing the time, controlled by an explicit - format string. See the section on \method{strftime()} behavior. + format string. See section~\ref{strftime-behavior} -- + \method{strftime()} behavior. \end{methoddesc} \begin{methoddesc}{utcoffset}{} @@ -1368,7 +1369,7 @@ representing only EST (fixed offset -5 hours), or only EDT (fixed offset -4 hours)). -\subsection{\method{strftime()} Behavior} +\subsection{\method{strftime()} Behavior\label{strftime-behavior}} \class{date}, \class{datetime}, and \class{time} objects all support a \code{strftime(\var{format})} diff --git a/Doc/lib/libdecimal.tex b/Doc/lib/libdecimal.tex index 092f038..ffc3363 100644 --- a/Doc/lib/libdecimal.tex +++ b/Doc/lib/libdecimal.tex @@ -442,9 +442,33 @@ the \function{getcontext()} and \function{setcontext()} functions: Set the current context for the active thread to \var{c}. \end{funcdesc} -New contexts can formed using the \class{Context} constructor described below. -In addition, the module provides three pre-made contexts: +Beginning with Python 2.5, you can also use the \keyword{with} statement +to temporarily change the active context. For example the following code +increases the current decimal precision by 2 places, performs a +calculation, and then automatically restores the previous context: +\begin{verbatim} +from __future__ import with_statement +import decimal + +with decimal.getcontext() as ctx: + ctx.prec += 2 # add 2 more digits of precision + calculate_something() +\end{verbatim} + +The context that's active in the body of the \keyword{with} statement is +a \emph{copy} of the context you provided to the \keyword{with} +statement, so modifying its attributes doesn't affect anything except +that temporary copy. + +You can use any decimal context in a \keyword{with} statement, but if +you just want to make a temporary change to some aspect of the current +context, it's easiest to just use \function{getcontext()} as shown +above. + +New contexts can also be created using the \class{Context} constructor +described below. In addition, the module provides three pre-made +contexts: \begin{classdesc*}{BasicContext} This is a standard context defined by the General Decimal Arithmetic diff --git a/Doc/lib/libfuncs.tex b/Doc/lib/libfuncs.tex index 0be3aa9..c0352d3 100644 --- a/Doc/lib/libfuncs.tex +++ b/Doc/lib/libfuncs.tex @@ -6,7 +6,7 @@ are always available. They are listed here in alphabetical order. \setindexsubitem{(built-in function)} -\begin{funcdesc}{__import__}{name\optional{, globals\optional{, locals\optional{, fromlist}}}} +\begin{funcdesc}{__import__}{name\optional{, globals\optional{, locals\optional{, fromlist\optional{, level}}}}} This function is invoked by the \keyword{import}\stindex{import} statement. It mainly exists so that you can replace it with another function that has a compatible interface, in order to change the @@ -20,9 +20,9 @@ are always available. They are listed here in alphabetical order. For example, the statement \samp{import spam} results in the following call: \code{__import__('spam',} \code{globals(),} - \code{locals(), [])}; the statement \samp{from spam.ham import eggs} + \code{locals(), [], -1)}; the statement \samp{from spam.ham import eggs} results in \samp{__import__('spam.ham', globals(), locals(), - ['eggs'])}. Note that even though \code{locals()} and + ['eggs'], -1)}. Note that even though \code{locals()} and \code{['eggs']} are passed in as arguments, the \function{__import__()} function does not set the local variable named \code{eggs}; this is done by subsequent code that is generated @@ -52,6 +52,15 @@ def my_import(name): mod = getattr(mod, comp) return mod \end{verbatim} + + \var{level} specifies whether to use absolute or relative imports. + The default is \code{-1} which indicates both absolute and relative + imports will be attempted. \code{0} means only perform absolute imports. + Positive values for \var{level} indicate the number of parent directories + to search relative to the directory of the module calling + \function{__import__}. +\versionchanged[The level parameter was added]{2.5} +\versionchanged[Keyword support for parameters was added]{2.5} \end{funcdesc} \begin{funcdesc}{abs}{x} @@ -683,7 +692,7 @@ class C: \end{funcdesc} \begin{funcdesc}{object}{} - Return a new featureless object. \function{object()} is a base + Return a new featureless object. \class{object} is a base for all new style classes. It has the methods that are common to all instances of new style classes. \versionadded{2.2} @@ -718,8 +727,11 @@ class C: \begin{funcdesc}{pow}{x, y\optional{, z}} Return \var{x} to the power \var{y}; if \var{z} is present, return \var{x} to the power \var{y}, modulo \var{z} (computed more - efficiently than \code{pow(\var{x}, \var{y}) \%\ \var{z}}). The - arguments must have numeric types. With mixed operand types, the + efficiently than \code{pow(\var{x}, \var{y}) \%\ \var{z}}). + The two-argument form \code{pow(\var{x}, \var{y})} is equivalent to using + the power operator: \code{\var{x}**\var{y}}. + + The arguments must have numeric types. With mixed operand types, the coercion rules for binary arithmetic operators apply. For int and long int operands, the result has the same type as the operands (after coercion) unless the second argument is negative; in that diff --git a/Doc/lib/libgc.tex b/Doc/lib/libgc.tex index 54ca26c..0d3408b 100644 --- a/Doc/lib/libgc.tex +++ b/Doc/lib/libgc.tex @@ -35,7 +35,8 @@ Returns true if automatic collection is enabled. \begin{funcdesc}{collect}{\optional{generation}} With no arguments, run a full collection. The optional argument \var{generation} may be an integer specifying which generation to collect -(from 0 to 2). A ValueError is raised if the generation number is invalid. +(from 0 to 2). A \exception{ValueError} is raised if the generation number +is invalid. The number of unreachable objects found is returned. \versionchanged[The optional \var{generation} argument was added]{2.5} diff --git a/Doc/lib/libgetpass.tex b/Doc/lib/libgetpass.tex index 28bfe8f..1d177d3 100644 --- a/Doc/lib/libgetpass.tex +++ b/Doc/lib/libgetpass.tex @@ -11,11 +11,15 @@ The \module{getpass} module provides two functions: -\begin{funcdesc}{getpass}{\optional{prompt}} +\begin{funcdesc}{getpass}{\optional{prompt\optional{, stream}}} Prompt the user for a password without echoing. The user is prompted using the string \var{prompt}, which defaults to - \code{'Password: '}. + \code{'Password: '}. On \UNIX, the prompt is written to the + file-like object \var{stream}, which defaults to + \code{sys.stdout} (this argument is ignored on Windows). + Availability: Macintosh, \UNIX, Windows. + \versionadded[The \var{stream} parameter]{2.5} \end{funcdesc} diff --git a/Doc/lib/libhashlib.tex b/Doc/lib/libhashlib.tex index e9d0b57..62e3fc4 100644 --- a/Doc/lib/libhashlib.tex +++ b/Doc/lib/libhashlib.tex @@ -31,7 +31,7 @@ of the strings fed to it so far using the \method{digest()} or Constructors for hash algorithms that are always present in this module are \function{md5()}, \function{sha1()}, \function{sha224()}, \function{sha256()}, \function{sha384()}, and \function{sha512()}. Additional algorithms may also -be available depending upon the OpenSSL library python uses on your platform. +be available depending upon the OpenSSL library that Python uses on your platform. \index{OpenSSL} For example, to obtain the digest of the string \code{'Nobody inspects diff --git a/Doc/lib/libitertools.tex b/Doc/lib/libitertools.tex index 421d647..20bbc8d 100644 --- a/Doc/lib/libitertools.tex +++ b/Doc/lib/libitertools.tex @@ -276,12 +276,30 @@ by functions or loops that truncate the stream. def izip(*iterables): iterables = map(iter, iterables) while iterables: - result = [i.next() for i in iterables] + result = [it.next() for it in iterables] yield tuple(result) \end{verbatim} \versionchanged[When no iterables are specified, returns a zero length - iterator instead of raising a TypeError exception]{2.4} + iterator instead of raising a \exception{TypeError} + exception]{2.4} + + Note, the left-to-right evaluation order of the iterables is guaranteed. + This makes possible an idiom for clustering a data series into n-length + groups using \samp{izip(*[iter(s)]*n)}. For data that doesn't fit + n-length groups exactly, the last tuple can be pre-padded with fill + values using \samp{izip(*[chain(s, [None]*(n-1))]*n)}. + + Note, when \function{izip()} is used with unequal length inputs, subsequent + iteration over the longer iterables cannot reliably be continued after + \function{izip()} terminates. Potentially, up to one entry will be missing + from each of the left-over iterables. This occurs because a value is fetched + from each iterator in-turn, but the process ends when one of the iterators + terminates. This leaves the last fetched values in limbo (they cannot be + returned in a final, incomplete tuple and they are cannot be pushed back + into the iterator for retrieval with \code{it.next()}). In general, + \function{izip()} should only be used with unequal length inputs when you + don't care about trailing, unmatched values from the longer iterables. \end{funcdesc} \begin{funcdesc}{repeat}{object\optional{, times}} @@ -517,4 +535,9 @@ def pairwise(iterable): pass return izip(a, b) +def grouper(n, iterable, padvalue=None): + "grouper(3, 'abcdefg', 'x') --> ('a','b','c'), ('d','e','f'), ('g','x','x')" + return izip(*[chain(iterable, repeat(padvalue, n-1))]*n) + + \end{verbatim} diff --git a/Doc/lib/liblinecache.tex b/Doc/lib/liblinecache.tex index c022ba9..1477d3c 100644 --- a/Doc/lib/liblinecache.tex +++ b/Doc/lib/liblinecache.tex @@ -15,7 +15,7 @@ the formatted traceback. The \module{linecache} module defines the following functions: -\begin{funcdesc}{getline}{filename, lineno} +\begin{funcdesc}{getline}{filename, lineno\optional{, module_globals}} Get line \var{lineno} from file named \var{filename}. This function will never throw an exception --- it will return \code{''} on errors (the terminating newline character will be included for lines that are @@ -23,7 +23,11 @@ found). If a file named \var{filename} is not found, the function will look for it in the module\indexiii{module}{search}{path} search path, -\code{sys.path}. +\code{sys.path}, after first checking for a \pep{302} \code{__loader__} +in \var{module_globals}, in case the module was imported from a zipfile +or other non-filesystem import source. + +\versionadded[The \var{module_globals} parameter was added]{2.5} \end{funcdesc} \begin{funcdesc}{clearcache}{} diff --git a/Doc/lib/libnntplib.tex b/Doc/lib/libnntplib.tex index 7f14dee..10330ed 100644 --- a/Doc/lib/libnntplib.tex +++ b/Doc/lib/libnntplib.tex @@ -68,48 +68,48 @@ flag \var{readermode} is true, then a \samp{mode reader} command is sent before authentication is performed. Reader mode is sometimes necessary if you are connecting to an NNTP server on the local machine and intend to call reader-specific commands, such as \samp{group}. If -you get unexpected \code{NNTPPermanentError}s, you might need to set +you get unexpected \exception{NNTPPermanentError}s, you might need to set \var{readermode}. \var{readermode} defaults to \code{None}. \var{usenetrc} defaults to \code{True}. \versionchanged[\var{usenetrc} argument added]{2.4} \end{classdesc} -\begin{classdesc}{NNTPError}{} -Derived from the standard exception \code{Exception}, this is the base -class for all exceptions raised by the \code{nntplib} module. -\end{classdesc} +\begin{excdesc}{NNTPError} +Derived from the standard exception \exception{Exception}, this is the +base class for all exceptions raised by the \module{nntplib} module. +\end{excdesc} -\begin{classdesc}{NNTPReplyError}{} +\begin{excdesc}{NNTPReplyError} Exception raised when an unexpected reply is received from the server. For backwards compatibility, the exception \code{error_reply} is equivalent to this class. -\end{classdesc} +\end{excdesc} -\begin{classdesc}{NNTPTemporaryError}{} +\begin{excdesc}{NNTPTemporaryError} Exception raised when an error code in the range 400--499 is received. For backwards compatibility, the exception \code{error_temp} is equivalent to this class. -\end{classdesc} +\end{excdesc} -\begin{classdesc}{NNTPPermanentError}{} +\begin{excdesc}{NNTPPermanentError} Exception raised when an error code in the range 500--599 is received. For backwards compatibility, the exception \code{error_perm} is equivalent to this class. -\end{classdesc} +\end{excdesc} -\begin{classdesc}{NNTPProtocolError}{} +\begin{excdesc}{NNTPProtocolError} Exception raised when a reply is received from the server that does not begin with a digit in the range 1--5. For backwards compatibility, the exception \code{error_proto} is equivalent to this class. -\end{classdesc} +\end{excdesc} -\begin{classdesc}{NNTPDataError}{} +\begin{excdesc}{NNTPDataError} Exception raised when there is some error in the response data. For backwards compatibility, the exception \code{error_data} is equivalent to this class. -\end{classdesc} +\end{excdesc} \subsection{NNTP Objects \label{nntp-objects}} diff --git a/Doc/lib/liboptparse.tex b/Doc/lib/liboptparse.tex index 4ab325b..8aca501 100644 --- a/Doc/lib/liboptparse.tex +++ b/Doc/lib/liboptparse.tex @@ -100,8 +100,8 @@ options; the traditional \UNIX{} syntax is a hyphen (``-'') followed by a single letter, e.g. \code{"-x"} or \code{"-F"}. Also, traditional \UNIX{} syntax allows multiple options to be merged into a single argument, e.g. \code{"-x -F"} is equivalent to \code{"-xF"}. The GNU project -introduced \code{"-{}-"} followed by a series of hyphen-separated words, -e.g. \code{"-{}-file"} or \code{"-{}-dry-run"}. These are the only two option +introduced \code{"{--}"} followed by a series of hyphen-separated words, +e.g. \code{"{--}file"} or \code{"{--}dry-run"}. These are the only two option syntaxes provided by \module{optparse}. Some other option syntaxes that the world has seen include: @@ -170,7 +170,7 @@ For example, consider this hypothetical command-line: prog -v --report /tmp/report.txt foo bar \end{verbatim} -\code{"-v"} and \code{"-{}-report"} are both options. Assuming that +\code{"-v"} and \code{"{--}report"} are both options. Assuming that \longprogramopt{report} takes one argument, \code{"/tmp/report.txt"} is an option argument. \code{"foo"} and \code{"bar"} are positional arguments. @@ -587,7 +587,7 @@ programmer errors and user errors. Programmer errors are usually erroneous calls to \code{parse.add{\_}option()}, e.g. invalid option strings, unknown option attributes, missing option attributes, etc. These are dealt with in the usual way: raise an exception (either -\code{optparse.OptionError} or \code{TypeError}) and let the program crash. +\exception{optparse.OptionError} or \exception{TypeError}) and let the program crash. Handling user errors is much more important, since they are guaranteed to happen no matter how stable your code is. \module{optparse} can automatically @@ -1019,9 +1019,9 @@ callback) as-is. Integer arguments are passed to \code{int()} to convert them to Python integers. If \code{int()} fails, so will \module{optparse}, although with a more -useful error message. (Internally, \module{optparse} raises OptionValueError; -OptionParser catches this exception higher up and terminates your -program with a useful error message.) +useful error message. (Internally, \module{optparse} raises +\exception{OptionValueError}; OptionParser catches this exception higher +up and terminates your program with a useful error message.) Likewise, \code{float} arguments are passed to \code{float()} for conversion, \code{long} arguments to \code{long()}, and \code{complex} arguments to @@ -1032,7 +1032,7 @@ arguments. option attribute (a sequence of strings) defines the set of allowed option arguments. \code{optparse.option.check{\_}choice()} compares user-supplied option arguments against this master list and raises -OptionValueError if an invalid string is given. +\exception{OptionValueError} if an invalid string is given. \subsubsection{Querying and manipulating your option parser\label{optparse-querying-manipulating-option-parser}} @@ -1052,7 +1052,7 @@ that option is removed. If that option provided any other option strings, all of those option strings become invalid. If \code{opt{\_}str} does not occur in any option belonging to this -OptionParser, raises ValueError. +OptionParser, raises \exception{ValueError}. \end{description} @@ -1087,7 +1087,7 @@ The available conflict-handling mechanisms are: \begin{description} \item[\code{error} (default)] assume option conflicts are a programming error and raise -OptionConflictError +\exception{OptionConflictError} \item[\code{resolve}] resolve option conflicts intelligently (see below) \end{description} @@ -1260,7 +1260,7 @@ is a dictionary of arbitrary keyword arguments supplied via \subsubsection{Raising errors in a callback\label{optparse-raising-errors-in-callback}} -The callback function should raise OptionValueError if there are any +The callback function should raise \exception{OptionValueError} if there are any problems with the option or its argument(s). \module{optparse} catches this and terminates the program, printing the error message you supply to stderr. Your message should be clear, concise, accurate, and mention diff --git a/Doc/lib/libos.tex b/Doc/lib/libos.tex index 9af5889..9ded3ae 100644 --- a/Doc/lib/libos.tex +++ b/Doc/lib/libos.tex @@ -343,6 +343,10 @@ Availability: Macintosh, \UNIX, Windows. \versionchanged[When specified, the \var{mode} argument must now start with one of the letters \character{r}, \character{w}, or \character{a}, otherwise a \exception{ValueError} is raised]{2.3} +\versionchanged[On \UNIX, when the \var{mode} argument starts with + \character{a}, the \var{O_APPEND} flag is set on the file descriptor + (which the \cfunction{fdopen()} implementation already does on most + platforms)]{2.5} \end{funcdesc} \begin{funcdesc}{popen}{command\optional{, mode\optional{, bufsize}}} @@ -547,7 +551,8 @@ documentation; flag constants (like \constant{O_RDONLY} and This function is intended for low-level I/O. For normal usage, use the built-in function \function{open()}, which returns a ``file object'' with \method{read()} and \method{write()} methods (and many -more). +more). To wrap a file descriptor in a ``file object'', use +\function{fdopen()}. \end{notice} \end{funcdesc} @@ -1731,6 +1736,27 @@ The \function{spawn()} functions called with \constant{P_NOWAIT} return suitable process handles. \end{funcdesc} +\begin{funcdesc}{wait3}{\optional{options}} +Similar to \function{waitpid()}, except no process id argument is given and +a 3-element tuple containing the child's process id, exit status indication, +and resource usage information is returned. Refer to +\module{resource}.\function{getrusage()} +for details on resource usage information. The option argument is the same +as that provided to \function{waitpid()} and \function{wait4()}. +Availability: \UNIX. +\versionadded{2.5} +\end{funcdesc} + +\begin{funcdesc}{wait4}{pid, options} +Similar to \function{waitpid()}, except a 3-element tuple, containing the +child's process id, exit status indication, and resource usage information +is returned. Refer to \module{resource}.\function{getrusage()} for details +on resource usage information. The arguments to \function{wait4()} are +the same as those provided to \function{waitpid()}. +Availability: \UNIX. +\versionadded{2.5} +\end{funcdesc} + \begin{datadesc}{WNOHANG} The option for \function{waitpid()} to return immediately if no child process status is available immediately. The function returns @@ -1818,14 +1844,14 @@ Return string-valued system configuration values. string which is the name of a defined system value; these names are specified in a number of standards (\POSIX, \UNIX{} 95, \UNIX{} 98, and others). Some platforms define additional names as well. The names -known to the host operating system are given in the +known to the host operating system are given as the keys of the \code{confstr_names} dictionary. For configuration variables not included in that mapping, passing an integer for \var{name} is also accepted. Availability: Macintosh, \UNIX. -If the configuration value specified by \var{name} isn't defined, the -empty string is returned. +If the configuration value specified by \var{name} isn't defined, +\code{None} is returned. If \var{name} is a string and is not known, \exception{ValueError} is raised. If a specific value for \var{name} is not supported by the diff --git a/Doc/lib/libossaudiodev.tex b/Doc/lib/libossaudiodev.tex index ec79e9e..223cf28 100644 --- a/Doc/lib/libossaudiodev.tex +++ b/Doc/lib/libossaudiodev.tex @@ -311,7 +311,7 @@ The mixer object provides two file-like methods: \begin{methoddesc}[mixer device]{close}{} This method closes the open mixer device file. Any further attempts to -use the mixer after this file is closed will raise an IOError. +use the mixer after this file is closed will raise an \exception{IOError}. \end{methoddesc} \begin{methoddesc}[mixer device]{fileno}{} diff --git a/Doc/lib/libpdb.tex b/Doc/lib/libpdb.tex index 6301175..a5b36a6 100644 --- a/Doc/lib/libpdb.tex +++ b/Doc/lib/libpdb.tex @@ -240,6 +240,45 @@ Condition is an expression which must evaluate to true before the breakpoint is honored. If condition is absent, any existing condition is removed; i.e., the breakpoint is made unconditional. +\item[commands \optional{\var{bpnumber}}] + +Specify a list of commands for breakpoint number \var{bpnumber}. The +commands themselves appear on the following lines. Type a line +containing just 'end' to terminate the commands. An example: + +\begin{verbatim} +(Pdb) commands 1 +(com) print some_variable +(com) end +(Pdb) +\end{verbatim} + +To remove all commands from a breakpoint, type commands and +follow it immediately with end; that is, give no commands. + +With no \var{bpnumber} argument, commands refers to the last +breakpoint set. + +You can use breakpoint commands to start your program up again. +Simply use the continue command, or step, or any other +command that resumes execution. + +Specifying any command resuming execution (currently continue, +step, next, return, jump, quit and their abbreviations) terminates +the command list (as if that command was immediately followed by end). +This is because any time you resume execution +(even with a simple next or step), you may encounter· +another breakpoint--which could have its own command list, leading to +ambiguities about which list to execute. + + If you use the 'silent' command in the command list, the +usual message about stopping at a breakpoint is not printed. This may +be desirable for breakpoints that are to print a specific message and +then continue. If none of the other commands print anything, you +see no sign that the breakpoint was reached. + +\versionadded{2.5} + \item[s(tep)] Execute the current line, stop at the first possible occasion diff --git a/Doc/lib/libprofile.tex b/Doc/lib/libprofile.tex index afc9694..9ff5ba0 100644 --- a/Doc/lib/libprofile.tex +++ b/Doc/lib/libprofile.tex @@ -124,7 +124,7 @@ layer on top of the internal \module{_lsprof} module. The %\end{description} -\section{Instant Users Manual \label{profile-instant}} +\section{Instant User's Manual \label{profile-instant}} This section is provided for users that ``don't want to read the manual.'' It provides a very brief overview, and allows a user to @@ -391,17 +391,17 @@ Analysis of the profiler data is done using this class from the % (This \stmodindex use may be hard to change ;-( ) \stmodindex{pstats} -\begin{classdesc}{Stats}{filename\optional{, \moreargs}} +\begin{classdesc}{Stats}{filename\optional{, \moreargs\optional{, stream=sys.stdout}}} This class constructor creates an instance of a ``statistics object'' from a \var{filename} (or set of filenames). \class{Stats} objects are -manipulated by methods, in order to print useful reports. - -The file selected by the above constructor must have been created by -the corresponding version of \module{profile} or \module{cProfile}. -To be specific, there is -\emph{no} file compatibility guaranteed with future versions of this -profiler, and there is no compatibility with files produced by other -profilers. +manipulated by methods, in order to print useful reports. You may specify +an alternate output stream by giving the keyword argument, \code{stream}. + +The file selected by the above constructor must have been created by the +corresponding version of \module{profile} or \module{cProfile}. To be +specific, there is \emph{no} file compatibility guaranteed with future +versions of this profiler, and there is no compatibility with files produced +by other profilers. %(such as the old system profiler). If several files are provided, all the statistics for identical diff --git a/Doc/lib/libpycompile.tex b/Doc/lib/libpycompile.tex index 0458191..85f0aaa 100644 --- a/Doc/lib/libpycompile.tex +++ b/Doc/lib/libpycompile.tex @@ -30,9 +30,10 @@ Exception raised when an error occurs while attempting to compile the file. \code{+} \code{'c'} (\code{'o'} if optimization is enabled in the current interpreter). If \var{dfile} is specified, it is used as the name of the source file in error messages instead of \var{file}. - If \var{doraise} = True, a PyCompileError is raised when an error is - encountered while compiling \var{file}. If \var{doraise} = False (the default), - an error string is written to sys.stderr, but no exception is raised. + If \var{doraise} is true, a \exception{PyCompileError} is raised when + an error is encountered while compiling \var{file}. If \var{doraise} + is false (the default), an error string is written to \code{sys.stderr}, + but no exception is raised. \end{funcdesc} \begin{funcdesc}{main}{\optional{args}} diff --git a/Doc/lib/libqueue.tex b/Doc/lib/libqueue.tex index f1d892a..95ad47f 100644 --- a/Doc/lib/libqueue.tex +++ b/Doc/lib/libqueue.tex @@ -1,3 +1,4 @@ + \section{\module{Queue} --- A synchronized queue class} @@ -94,3 +95,51 @@ immediately available, else raise the \exception{Empty} exception \begin{methoddesc}{get_nowait}{} Equivalent to \code{get(False)}. \end{methoddesc} + +Two methods are offered to support tracking whether enqueued tasks have +been fully processed by daemon consumer threads. + +\begin{methoddesc}{task_done}{} +Indicate that a formerly enqueued task is complete. Used by queue consumer +threads. For each \method{get()} used to fetch a task, a subsequent call to +\method{task_done()} tells the queue that the processing on the task is complete. + +If a \method{join()} is currently blocking, it will resume when all items +have been processed (meaning that a \method{task_done()} call was received +for every item that had been \method{put()} into the queue). + +Raises a \exception{ValueError} if called more times than there were items +placed in the queue. +\versionadded{2.5} +\end{methoddesc} + +\begin{methoddesc}{join}{} +Blocks until all items in the queue have been gotten and processed. + +The count of unfinished tasks goes up whenever an item is added to the +queue. The count goes down whenever a consumer thread calls \method{task_done()} +to indicate that the item was retrieved and all work on it is complete. +When the count of unfinished tasks drops to zero, join() unblocks. +\versionadded{2.5} +\end{methoddesc} + +Example of how to wait for enqueued tasks to be completed: + +\begin{verbatim} + def worker(): + while True: + item = q.get() + do_work(item) + q.task_done() + + q = Queue() + for i in range(num_worker_threads): + t = Thread(target=worker) + t.setDaemon(True) + t.start() + + for item in source(): + q.put(item) + + q.join() # block until all tasks are done +\end{verbatim} diff --git a/Doc/lib/libre.tex b/Doc/lib/libre.tex index 8e6513a..1404e09 100644 --- a/Doc/lib/libre.tex +++ b/Doc/lib/libre.tex @@ -566,9 +566,6 @@ ignored. >>> re.split('\W+', 'Words, words, words.', 1) ['Words', 'words, words.'] \end{verbatim} - - This function combines and extends the functionality of - the old \function{regsub.split()} and \function{regsub.splitx()}. \end{funcdesc} \begin{funcdesc}{findall}{pattern, string\optional{, flags}} @@ -934,7 +931,7 @@ The equivalent regular expression would be \leftline{\strong{Avoiding recursion}} If you create regular expressions that require the engine to perform a -lot of recursion, you may encounter a RuntimeError exception with +lot of recursion, you may encounter a \exception{RuntimeError} exception with the message \code{maximum recursion limit} exceeded. For example, \begin{verbatim} @@ -943,7 +940,7 @@ the message \code{maximum recursion limit} exceeded. For example, >>> re.match('Begin (\w| )*? end', s).end() Traceback (most recent call last): File "", line 1, in ? - File "/usr/local/lib/python2.3/sre.py", line 132, in match + File "/usr/local/lib/python2.5/re.py", line 132, in match return _compile(pattern, flags).match(string) RuntimeError: maximum recursion limit exceeded \end{verbatim} diff --git a/Doc/lib/libreconvert.tex b/Doc/lib/libreconvert.tex deleted file mode 100644 index 29c6e52..0000000 --- a/Doc/lib/libreconvert.tex +++ /dev/null @@ -1,80 +0,0 @@ -\section{\module{reconvert} --- - Convert regular expressions from regex to re form} -\declaremodule{standard}{reconvert} -\moduleauthor{Andrew M. Kuchling}{amk@amk.ca} -\sectionauthor{Skip Montanaro}{skip@pobox.com} - - -\modulesynopsis{Convert regex-, emacs- or sed-style regular expressions -to re-style syntax.} - - -This module provides a facility to convert regular expressions from the -syntax used by the deprecated \module{regex} module to those used by the -newer \module{re} module. Because of similarity between the regular -expression syntax of \code{sed(1)} and \code{emacs(1)} and the -\module{regex} module, it is also helpful to convert patterns written for -those tools to \module{re} patterns. - -When used as a script, a Python string literal (or any other expression -evaluating to a string) is read from stdin, and the translated expression is -written to stdout as a string literal. Unless stdout is a tty, no trailing -newline is written to stdout. This is done so that it can be used with -Emacs \code{C-U M-|} (shell-command-on-region) which filters the region -through the shell command. - -\begin{seealso} - \seetitle{Mastering Regular Expressions}{Book on regular expressions - by Jeffrey Friedl, published by O'Reilly. The second - edition of the book no longer covers Python at all, - but the first edition covered writing good regular expression - patterns in great detail.} -\end{seealso} - -\subsection{Module Contents} -\nodename{Contents of Module reconvert} - -The module defines two functions and a handful of constants. - -\begin{funcdesc}{convert}{pattern\optional{, syntax=None}} - Convert a \var{pattern} representing a \module{regex}-stype regular - expression into a \module{re}-style regular expression. The optional - \var{syntax} parameter is a bitwise-or'd set of flags that control what - constructs are converted. See below for a description of the various - constants. -\end{funcdesc} - -\begin{funcdesc}{quote}{s\optional{, quote=None}} - Convert a string object to a quoted string literal. - - This is similar to \function{repr} but will return a "raw" string (r'...' - or r"...") when the string contains backslashes, instead of doubling all - backslashes. The resulting string does not always evaluate to the same - string as the original; however it will do just the right thing when passed - into re.compile(). - - The optional second argument forces the string quote; it must be a single - character which is a valid Python string quote. Note that prior to Python - 2.5 this would not accept triple-quoted string delimiters. -\end{funcdesc} - -\begin{datadesc}{RE_NO_BK_PARENS} - Suppress paren conversion. This should be omitted when converting - \code{sed}-style or \code{emacs}-style regular expressions. -\end{datadesc} - -\begin{datadesc}{RE_NO_BK_VBAR} - Suppress vertical bar conversion. This should be omitted when converting - \code{sed}-style or \code{emacs}-style regular expressions. -\end{datadesc} - -\begin{datadesc}{RE_BK_PLUS_QM} - Enable conversion of \code{+} and \code{?} characters. This should be - added to the \var{syntax} arg of \function{convert} when converting - \code{sed}-style regular expressions and omitted when converting - \code{emacs}-style regular expressions. -\end{datadesc} - -\begin{datadesc}{RE_NEWLINE_OR} - When set, newline characters are replaced by \code{|}. -\end{datadesc} diff --git a/Doc/lib/libregex.tex b/Doc/lib/libregex.tex deleted file mode 100644 index 0982f81..0000000 --- a/Doc/lib/libregex.tex +++ /dev/null @@ -1,370 +0,0 @@ -\section{\module{regex} --- - Regular expression operations} -\declaremodule{builtin}{regex} - -\modulesynopsis{Regular expression search and match operations. - \strong{Obsolete!}} - - -This module provides regular expression matching operations similar to -those found in Emacs. - -\strong{Obsolescence note:} -This module is obsolete as of Python version 1.5; it is still being -maintained because much existing code still uses it. All new code in -need of regular expressions should use the new -\code{re}\refstmodindex{re} module, which supports the more powerful -and regular Perl-style regular expressions. Existing code should be -converted. The standard library module -\code{reconvert}\refstmodindex{reconvert} helps in converting -\code{regex} style regular expressions to \code{re}\refstmodindex{re} -style regular expressions. (For more conversion help, see Andrew -Kuchling's\index{Kuchling, Andrew} ``\module{regex-to-re} HOWTO'' at -\url{http://www.python.org/doc/howto/regex-to-re/}.) - -By default the patterns are Emacs-style regular expressions -(with one exception). There is -a way to change the syntax to match that of several well-known -\UNIX{} utilities. The exception is that Emacs' \samp{\e s} -pattern is not supported, since the original implementation references -the Emacs syntax tables. - -This module is 8-bit clean: both patterns and strings may contain null -bytes and characters whose high bit is set. - -\strong{Please note:} There is a little-known fact about Python string -literals which means that you don't usually have to worry about -doubling backslashes, even though they are used to escape special -characters in string literals as well as in regular expressions. This -is because Python doesn't remove backslashes from string literals if -they are followed by an unrecognized escape character. -\emph{However}, if you want to include a literal \dfn{backslash} in a -regular expression represented as a string literal, you have to -\emph{quadruple} it or enclose it in a singleton character class. -E.g.\ to extract \LaTeX\ \samp{\e section\{\textrm{\ldots}\}} headers -from a document, you can use this pattern: -\code{'[\e ]section\{\e (.*\e )\}'}. \emph{Another exception:} -the escape sequence \samp{\e b} is significant in string literals -(where it means the ASCII bell character) as well as in Emacs regular -expressions (where it stands for a word boundary), so in order to -search for a word boundary, you should use the pattern \code{'\e \e b'}. -Similarly, a backslash followed by a digit 0-7 should be doubled to -avoid interpretation as an octal escape. - -\subsection{Regular Expressions} - -A regular expression (or RE) specifies a set of strings that matches -it; the functions in this module let you check if a particular string -matches a given regular expression (or if a given regular expression -matches a particular string, which comes down to the same thing). - -Regular expressions can be concatenated to form new regular -expressions; if \emph{A} and \emph{B} are both regular expressions, -then \emph{AB} is also an regular expression. If a string \emph{p} -matches A and another string \emph{q} matches B, the string \emph{pq} -will match AB. Thus, complex expressions can easily be constructed -from simpler ones like the primitives described here. For details of -the theory and implementation of regular expressions, consult almost -any textbook about compiler construction. - -% XXX The reference could be made more specific, say to -% "Compilers: Principles, Techniques and Tools", by Alfred V. Aho, -% Ravi Sethi, and Jeffrey D. Ullman, or some FA text. - -A brief explanation of the format of regular expressions follows. - -Regular expressions can contain both special and ordinary characters. -Ordinary characters, like '\code{A}', '\code{a}', or '\code{0}', are -the simplest regular expressions; they simply match themselves. You -can concatenate ordinary characters, so '\code{last}' matches the -characters 'last'. (In the rest of this section, we'll write RE's in -\code{this special font}, usually without quotes, and strings to be -matched 'in single quotes'.) - -Special characters either stand for classes of ordinary characters, or -affect how the regular expressions around them are interpreted. - -The special characters are: -\begin{itemize} -\item[\code{.}] (Dot.) Matches any character except a newline. -\item[\code{\^}] (Caret.) Matches the start of the string. -\item[\code{\$}] Matches the end of the string. -\code{foo} matches both 'foo' and 'foobar', while the regular -expression '\code{foo\$}' matches only 'foo'. -\item[\code{*}] Causes the resulting RE to -match 0 or more repetitions of the preceding RE. \code{ab*} will -match 'a', 'ab', or 'a' followed by any number of 'b's. -\item[\code{+}] Causes the -resulting RE to match 1 or more repetitions of the preceding RE. -\code{ab+} will match 'a' followed by any non-zero number of 'b's; it -will not match just 'a'. -\item[\code{?}] Causes the resulting RE to -match 0 or 1 repetitions of the preceding RE. \code{ab?} will -match either 'a' or 'ab'. - -\item[\code{\e}] Either escapes special characters (permitting you to match -characters like '*?+\&\$'), or signals a special sequence; special -sequences are discussed below. Remember that Python also uses the -backslash as an escape sequence in string literals; if the escape -sequence isn't recognized by Python's parser, the backslash and -subsequent character are included in the resulting string. However, -if Python would recognize the resulting sequence, the backslash should -be repeated twice. - -\item[\code{[]}] Used to indicate a set of characters. Characters can -be listed individually, or a range is indicated by giving two -characters and separating them by a '-'. Special characters are -not active inside sets. For example, \code{[akm\$]} -will match any of the characters 'a', 'k', 'm', or '\$'; \code{[a-z]} will -match any lowercase letter. - -If you want to include a \code{]} inside a -set, it must be the first character of the set; to include a \code{-}, -place it as the first or last character. - -Characters \emph{not} within a range can be matched by including a -\code{\^} as the first character of the set; \code{\^} elsewhere will -simply match the '\code{\^}' character. -\end{itemize} - -The special sequences consist of '\code{\e}' and a character -from the list below. If the ordinary character is not on the list, -then the resulting RE will match the second character. For example, -\code{\e\$} matches the character '\$'. Ones where the backslash -should be doubled in string literals are indicated. - -\begin{itemize} -\item[\code{\e|}]\code{A\e|B}, where A and B can be arbitrary REs, -creates a regular expression that will match either A or B. This can -be used inside groups (see below) as well. -% -\item[\code{\e( \e)}] Indicates the start and end of a group; the -contents of a group can be matched later in the string with the -\code{\e [1-9]} special sequence, described next. -\end{itemize} - -\begin{fulllineitems} -\item[\code{\e \e 1, ... \e \e 7, \e 8, \e 9}] -Matches the contents of the group of the same -number. For example, \code{\e (.+\e ) \e \e 1} matches 'the the' or -'55 55', but not 'the end' (note the space after the group). This -special sequence can only be used to match one of the first 9 groups; -groups with higher numbers can be matched using the \code{\e v} -sequence. (\code{\e 8} and \code{\e 9} don't need a double backslash -because they are not octal digits.) -\end{fulllineitems} - -\begin{itemize} -\item[\code{\e \e b}] Matches the empty string, but only at the -beginning or end of a word. A word is defined as a sequence of -alphanumeric characters, so the end of a word is indicated by -whitespace or a non-alphanumeric character. -% -\item[\code{\e B}] Matches the empty string, but when it is \emph{not} at the -beginning or end of a word. -% -\item[\code{\e v}] Must be followed by a two digit decimal number, and -matches the contents of the group of the same number. The group -number must be between 1 and 99, inclusive. -% -\item[\code{\e w}]Matches any alphanumeric character; this is -equivalent to the set \code{[a-zA-Z0-9]}. -% -\item[\code{\e W}] Matches any non-alphanumeric character; this is -equivalent to the set \code{[\^a-zA-Z0-9]}. -\item[\code{\e <}] Matches the empty string, but only at the beginning of a -word. A word is defined as a sequence of alphanumeric characters, so -the end of a word is indicated by whitespace or a non-alphanumeric -character. -\item[\code{\e >}] Matches the empty string, but only at the end of a -word. - -\item[\code{\e \e \e \e}] Matches a literal backslash. - -% In Emacs, the following two are start of buffer/end of buffer. In -% Python they seem to be synonyms for ^$. -\item[\code{\e `}] Like \code{\^}, this only matches at the start of the -string. -\item[\code{\e \e '}] Like \code{\$}, this only matches at the end of -the string. -% end of buffer -\end{itemize} - -\subsection{Module Contents} -\nodename{Contents of Module regex} - -The module defines these functions, and an exception: - - -\begin{funcdesc}{match}{pattern, string} - Return how many characters at the beginning of \var{string} match - the regular expression \var{pattern}. Return \code{-1} if the - string does not match the pattern (this is different from a - zero-length match!). -\end{funcdesc} - -\begin{funcdesc}{search}{pattern, string} - Return the first position in \var{string} that matches the regular - expression \var{pattern}. Return \code{-1} if no position in the string - matches the pattern (this is different from a zero-length match - anywhere!). -\end{funcdesc} - -\begin{funcdesc}{compile}{pattern\optional{, translate}} - Compile a regular expression pattern into a regular expression - object, which can be used for matching using its \code{match()} and - \code{search()} methods, described below. The optional argument - \var{translate}, if present, must be a 256-character string - indicating how characters (both of the pattern and of the strings to - be matched) are translated before comparing them; the \var{i}-th - element of the string gives the translation for the character with - \ASCII{} code \var{i}. This can be used to implement - case-insensitive matching; see the \code{casefold} data item below. - - The sequence - -\begin{verbatim} -prog = regex.compile(pat) -result = prog.match(str) -\end{verbatim} -% -is equivalent to - -\begin{verbatim} -result = regex.match(pat, str) -\end{verbatim} - -but the version using \code{compile()} is more efficient when multiple -regular expressions are used concurrently in a single program. (The -compiled version of the last pattern passed to \code{regex.match()} or -\code{regex.search()} is cached, so programs that use only a single -regular expression at a time needn't worry about compiling regular -expressions.) -\end{funcdesc} - -\begin{funcdesc}{set_syntax}{flags} - Set the syntax to be used by future calls to \code{compile()}, - \code{match()} and \code{search()}. (Already compiled expression - objects are not affected.) The argument is an integer which is the - OR of several flag bits. The return value is the previous value of - the syntax flags. Names for the flags are defined in the standard - module \code{regex_syntax}\refstmodindex{regex_syntax}; read the - file \file{regex_syntax.py} for more information. -\end{funcdesc} - -\begin{funcdesc}{get_syntax}{} - Returns the current value of the syntax flags as an integer. -\end{funcdesc} - -\begin{funcdesc}{symcomp}{pattern\optional{, translate}} -This is like \code{compile()}, but supports symbolic group names: if a -parenthesis-enclosed group begins with a group name in angular -brackets, e.g. \code{'\e([a-z][a-z0-9]*\e)'}, the group can -be referenced by its name in arguments to the \code{group()} method of -the resulting compiled regular expression object, like this: -\code{p.group('id')}. Group names may contain alphanumeric characters -and \code{'_'} only. -\end{funcdesc} - -\begin{excdesc}{error} - Exception raised when a string passed to one of the functions here - is not a valid regular expression (e.g., unmatched parentheses) or - when some other error occurs during compilation or matching. (It is - never an error if a string contains no match for a pattern.) -\end{excdesc} - -\begin{datadesc}{casefold} -A string suitable to pass as the \var{translate} argument to -\code{compile()} to map all upper case characters to their lowercase -equivalents. -\end{datadesc} - -\noindent -Compiled regular expression objects support these methods: - -\setindexsubitem{(regex method)} -\begin{funcdesc}{match}{string\optional{, pos}} - Return how many characters at the beginning of \var{string} match - the compiled regular expression. Return \code{-1} if the string - does not match the pattern (this is different from a zero-length - match!). - - The optional second parameter, \var{pos}, gives an index in the string - where the search is to start; it defaults to \code{0}. This is not - completely equivalent to slicing the string; the \code{'\^'} pattern - character matches at the real beginning of the string and at positions - just after a newline, not necessarily at the index where the search - is to start. -\end{funcdesc} - -\begin{funcdesc}{search}{string\optional{, pos}} - Return the first position in \var{string} that matches the regular - expression \code{pattern}. Return \code{-1} if no position in the - string matches the pattern (this is different from a zero-length - match anywhere!). - - The optional second parameter has the same meaning as for the - \code{match()} method. -\end{funcdesc} - -\begin{funcdesc}{group}{index, index, ...} -This method is only valid when the last call to the \code{match()} -or \code{search()} method found a match. It returns one or more -groups of the match. If there is a single \var{index} argument, -the result is a single string; if there are multiple arguments, the -result is a tuple with one item per argument. If the \var{index} is -zero, the corresponding return value is the entire matching string; if -it is in the inclusive range [1..99], it is the string matching the -corresponding parenthesized group (using the default syntax, -groups are parenthesized using \code{{\e}(} and \code{{\e})}). If no -such group exists, the corresponding result is \code{None}. - -If the regular expression was compiled by \code{symcomp()} instead of -\code{compile()}, the \var{index} arguments may also be strings -identifying groups by their group name. -\end{funcdesc} - -\noindent -Compiled regular expressions support these data attributes: - -\setindexsubitem{(regex attribute)} - -\begin{datadesc}{regs} -When the last call to the \code{match()} or \code{search()} method found a -match, this is a tuple of pairs of indexes corresponding to the -beginning and end of all parenthesized groups in the pattern. Indices -are relative to the string argument passed to \code{match()} or -\code{search()}. The 0-th tuple gives the beginning and end or the -whole pattern. When the last match or search failed, this is -\code{None}. -\end{datadesc} - -\begin{datadesc}{last} -When the last call to the \code{match()} or \code{search()} method found a -match, this is the string argument passed to that method. When the -last match or search failed, this is \code{None}. -\end{datadesc} - -\begin{datadesc}{translate} -This is the value of the \var{translate} argument to -\code{regex.compile()} that created this regular expression object. If -the \var{translate} argument was omitted in the \code{regex.compile()} -call, this is \code{None}. -\end{datadesc} - -\begin{datadesc}{givenpat} -The regular expression pattern as passed to \code{compile()} or -\code{symcomp()}. -\end{datadesc} - -\begin{datadesc}{realpat} -The regular expression after stripping the group names for regular -expressions compiled with \code{symcomp()}. Same as \code{givenpat} -otherwise. -\end{datadesc} - -\begin{datadesc}{groupindex} -A dictionary giving the mapping from symbolic group names to numerical -group indexes for regular expressions compiled with \code{symcomp()}. -\code{None} otherwise. -\end{datadesc} diff --git a/Doc/lib/libregsub.tex b/Doc/lib/libregsub.tex deleted file mode 100644 index b41b700..0000000 --- a/Doc/lib/libregsub.tex +++ /dev/null @@ -1,74 +0,0 @@ -\section{\module{regsub} --- - String operations using regular expressions} - -\declaremodule{standard}{regsub} -\modulesynopsis{Substitution and splitting operations that use - regular expressions. \strong{Obsolete!}} - - -This module defines a number of functions useful for working with -regular expressions (see built-in module \refmodule{regex}). - -Warning: these functions are not thread-safe. - -\strong{Obsolescence note:} -This module is obsolete as of Python version 1.5; it is still being -maintained because much existing code still uses it. All new code in -need of regular expressions should use the new \refmodule{re} module, which -supports the more powerful and regular Perl-style regular expressions. -Existing code should be converted. The standard library module -\module{reconvert} helps in converting \refmodule{regex} style regular -expressions to \refmodule{re} style regular expressions. (For more -conversion help, see Andrew Kuchling's\index{Kuchling, Andrew} -``regex-to-re HOWTO'' at -\url{http://www.python.org/doc/howto/regex-to-re/}.) - - -\begin{funcdesc}{sub}{pat, repl, str} -Replace the first occurrence of pattern \var{pat} in string -\var{str} by replacement \var{repl}. If the pattern isn't found, -the string is returned unchanged. The pattern may be a string or an -already compiled pattern. The replacement may contain references -\samp{\e \var{digit}} to subpatterns and escaped backslashes. -\end{funcdesc} - -\begin{funcdesc}{gsub}{pat, repl, str} -Replace all (non-overlapping) occurrences of pattern \var{pat} in -string \var{str} by replacement \var{repl}. The same rules as for -\code{sub()} apply. Empty matches for the pattern are replaced only -when not adjacent to a previous match, so e.g. -\code{gsub('', '-', 'abc')} returns \code{'-a-b-c-'}. -\end{funcdesc} - -\begin{funcdesc}{split}{str, pat\optional{, maxsplit}} -Split the string \var{str} in fields separated by delimiters matching -the pattern \var{pat}, and return a list containing the fields. Only -non-empty matches for the pattern are considered, so e.g. -\code{split('a:b', ':*')} returns \code{['a', 'b']} and -\code{split('abc', '')} returns \code{['abc']}. The \var{maxsplit} -defaults to 0. If it is nonzero, only \var{maxsplit} number of splits -occur, and the remainder of the string is returned as the final -element of the list. -\end{funcdesc} - -\begin{funcdesc}{splitx}{str, pat\optional{, maxsplit}} -Split the string \var{str} in fields separated by delimiters matching -the pattern \var{pat}, and return a list containing the fields as well -as the separators. For example, \code{splitx('a:::b', ':*')} returns -\code{['a', ':::', 'b']}. Otherwise, this function behaves the same -as \code{split}. -\end{funcdesc} - -\begin{funcdesc}{capwords}{s\optional{, pat}} -Capitalize words separated by optional pattern \var{pat}. The default -pattern uses any characters except letters, digits and underscores as -word delimiters. Capitalization is done by changing the first -character of each word to upper case. -\end{funcdesc} - -\begin{funcdesc}{clear_cache}{} -The regsub module maintains a cache of compiled regular expressions, -keyed on the regular expression string and the syntax of the regex -module at the time the expression was compiled. This function clears -that cache. -\end{funcdesc} diff --git a/Doc/lib/librunpy.tex b/Doc/lib/librunpy.tex new file mode 100644 index 0000000..4be9901 --- /dev/null +++ b/Doc/lib/librunpy.tex @@ -0,0 +1,74 @@ +\section{\module{runpy} --- + Locating and executing Python modules.} + +\declaremodule{standard}{runpy} % standard library, in Python + +\moduleauthor{Nick Coghlan}{ncoghlan@gmail.com} + +\modulesynopsis{Locate and execute Python modules as scripts} + +\versionadded{2.5} + +The \module{runpy} module is used to locate and run Python modules +without importing them first. It's main use is to implement the +\programopt{-m} command line switch that allows scripts to be located +using the Python module namespace rather than the filesystem. + +When executed as a script, the module effectively operates as follows: +\begin{verbatim} + del sys.argv[0] # Remove the runpy module from the arguments + run_module(sys.argv[0], run_name="__main__", alter_sys=True) +\end{verbatim} + +The \module{runpy} module provides a single function: + +\begin{funcdesc}{run_module}{mod_name\optional{, init_globals} +\optional{, run_name}\optional{, alter_sys}} +Execute the code of the specified module and return the resulting +module globals dictionary. The module's code is first located using +the standard import mechanism (refer to PEP 302 for details) and +then executed in a fresh module namespace. + +The optional dictionary argument \var{init_globals} may be used to +pre-populate the globals dictionary before the code is executed. +The supplied dictionary will not be modified. If any of the special +global variables below are defined in the supplied dictionary, those +definitions are overridden by the \code{run_module} function. + +The special global variables \code{__name__}, \code{__file__}, +\code{__loader__} and \code{__builtins__} are set in the globals +dictionary before the module code is executed. + +\code{__name__} is set to \var{run_name} if this optional argument is +supplied, and the \var{mod_name} argument otherwise. + +\code{__loader__} is set to the PEP 302 module loader used to retrieve +the code for the module (This loader may be a wrapper around the +standard import mechanism). + +\code{__file__} is set to the name provided by the module loader. If +the loader does not make filename information available, this +variable is set to \code{None}. + +\code{__builtins__} is automatically initialised with a reference to +the top level namespace of the \module{__builtin__} module. + +If the argument \var{alter_sys} is supplied and evaluates to +\code{True}, then \code{sys.argv[0]} is updated with the value of +\code{__file__} and \code{sys.modules[__name__]} is updated with a +temporary module object for the module being executed. Both +\code{sys.argv[0]} and \code{sys.modules[__name__]} are restored to +their original values before the function returns. + +Note that this manipulation of \module{sys} is not thread-safe. Other +threads may see the partially initialised module, as well as the +altered list of arguments. It is recommended that the \module{sys} +module be left alone when invoking this function from threaded code. +\end{funcdesc} + +\begin{seealso} + +\seepep{338}{Executing modules as scripts}{PEP written and +implemented by Nick Coghlan.} + +\end{seealso} diff --git a/Doc/lib/libsets.tex b/Doc/lib/libsets.tex index dd85ec7..22bf34b 100644 --- a/Doc/lib/libsets.tex +++ b/Doc/lib/libsets.tex @@ -151,12 +151,13 @@ but not found in \class{ImmutableSet}: \lineiii{\var{s}.add(\var{x})}{} {add element \var{x} to set \var{s}} \lineiii{\var{s}.remove(\var{x})}{} - {remove \var{x} from set \var{s}; raises KeyError if not present} + {remove \var{x} from set \var{s}; raises \exception{KeyError} + if not present} \lineiii{\var{s}.discard(\var{x})}{} {removes \var{x} from set \var{s} if present} \lineiii{\var{s}.pop()}{} {remove and return an arbitrary element from \var{s}; raises - KeyError if empty} + \exception{KeyError} if empty} \lineiii{\var{s}.clear()}{} {remove all elements from set \var{s}} \end{tableiii} diff --git a/Doc/lib/libsgmllib.tex b/Doc/lib/libsgmllib.tex index 27bf0b0..1578313 100644 --- a/Doc/lib/libsgmllib.tex +++ b/Doc/lib/libsgmllib.tex @@ -95,12 +95,22 @@ lower case, and the \var{method} argument is the bound method which should be used to support semantic interpretation of the start tag. The \var{attributes} argument is a list of \code{(\var{name}, \var{value})} pairs containing the attributes found inside the tag's -\code{<>} brackets. The \var{name} has been translated to lower case -and double quotes and backslashes in the \var{value} have been interpreted. +\code{<>} brackets. + +The \var{name} has been translated to lower case. +Double quotes and backslashes in the \var{value} have been interpreted, +as well as known character references and known entity references +terminated by a semicolon (normally, entity references can be terminated +by any non-alphanumerical character, but this would break the very +common case of \code{} when \code{eggs} +is a valid entity name). + For instance, for the tag \code{}, this method would be called as \samp{unknown_starttag('a', [('href', 'http://www.cwi.nl/')])}. The base implementation simply calls \var{method} with \var{attributes} as the only argument. +\versionadded[Handling of entity and character references within + attribute values]{2.5} \end{methoddesc} \begin{methoddesc}{handle_endtag}{tag, method} diff --git a/Doc/lib/libshutil.tex b/Doc/lib/libshutil.tex index a217150..449d741 100644 --- a/Doc/lib/libshutil.tex +++ b/Doc/lib/libshutil.tex @@ -73,18 +73,18 @@ file type and creator codes will not be correct. If \var{symlinks} is true, symbolic links in the source tree are represented as symbolic links in the new tree; if false or omitted, the contents of the linked files are copied to - the new tree. If exception(s) occur, an Error is raised + the new tree. If exception(s) occur, an \exception{Error} is raised with a list of reasons. The source code for this should be considered an example rather than a tool. - \versionchanged[Error is raised if any exceptions occur during copying, - rather than printing a message]{2.3} + \versionchanged[\exception{Error} is raised if any exceptions occur during + copying, rather than printing a message]{2.3} \versionchanged[Create intermediate directories needed to create \var{dst}, - rather than raising an error. Copy permissions and times of directories using - \function{copystat()}]{2.5} + rather than raising an error. Copy permissions and times of + directories using \function{copystat()}]{2.5} \end{funcdesc} diff --git a/Doc/lib/libsignal.tex b/Doc/lib/libsignal.tex index f168b6d..cfdb4dd 100644 --- a/Doc/lib/libsignal.tex +++ b/Doc/lib/libsignal.tex @@ -100,7 +100,7 @@ The \module{signal} module defines the following functions: Any previously scheduled alarm is canceled (only one alarm can be scheduled at any time). The returned value is then the number of seconds before any previously set alarm was to have been delivered. - If \var{time} is zero, no alarm id scheduled, and any scheduled + If \var{time} is zero, no alarm is scheduled, and any scheduled alarm is canceled. The return value is the number of seconds remaining before a previously scheduled alarm. If the return value is zero, no alarm is currently scheduled. (See the \UNIX{} man page diff --git a/Doc/lib/libsocket.tex b/Doc/lib/libsocket.tex index cc7bd75..c7b656d 100644 --- a/Doc/lib/libsocket.tex +++ b/Doc/lib/libsocket.tex @@ -317,10 +317,11 @@ Availability: \UNIX. \versionadded{2.4} \end{funcdesc} \begin{funcdesc}{fromfd}{fd, family, type\optional{, proto}} -Build a socket object from an existing file descriptor (an integer as -returned by a file object's \method{fileno()} method). Address family, -socket type and protocol number are as for the \function{socket()} function -above. The file descriptor should refer to a socket, but this is not +Duplicate the file descriptor \var{fd} (an integer as returned by a file +object's \method{fileno()} method) and build a socket object from the +result. Address family, socket type and protocol number are as for the +\function{socket()} function above. +The file descriptor should refer to a socket, but this is not checked --- subsequent operations on the object may fail if the file descriptor is invalid. This function is rarely needed, but can be used to get or set socket options on a socket passed to a program as @@ -626,7 +627,7 @@ timeouts on socket operations. \end{methoddesc} \begin{methoddesc}[socket]{gettimeout}{} -Returns the timeout in floating seconds associated with socket +Return the timeout in floating seconds associated with socket operations, or \code{None} if no timeout is set. This reflects the last call to \method{setblocking()} or \method{settimeout()}. \versionadded{2.3} @@ -677,6 +678,25 @@ use \method{recv()} and \method{send()} without \var{flags} argument instead. +Socket objects also have these (read-only) attributes that correspond +to the values given to the \class{socket} constructor. + +\begin{memberdesc}[socket]{family} +The socket family. +\versionadded{2.5} +\end{memberdesc} + +\begin{memberdesc}[socket]{type} +The socket type. +\versionadded{2.5} +\end{memberdesc} + +\begin{memberdesc}[socket]{proto} +The socket protocol. +\versionadded{2.5} +\end{memberdesc} + + \subsection{SSL Objects \label{ssl-objects}} SSL objects have the following methods. diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex index d2a0425..8d011fd 100644 --- a/Doc/lib/libstdtypes.tex +++ b/Doc/lib/libstdtypes.tex @@ -185,10 +185,12 @@ There are four distinct numeric types: \dfn{plain integers}, In addition, Booleans are a subtype of plain integers. Plain integers (also just called \dfn{integers}) are implemented using \ctype{long} in C, which gives them at least 32 -bits of precision. Long integers have unlimited precision. Floating -point numbers are implemented using \ctype{double} in C. All bets on -their precision are off unless you happen to know the machine you are -working with. +bits of precision (\code{sys.maxint} is always set to the maximum +plain integer value for the current platform, the minimum value is +\code{-sys.maxint - 1}). Long integers have unlimited precision. +Floating point numbers are implemented using \ctype{double} in C. +All bets on their precision are off unless you happen to know the +machine you are working with. \obindex{numeric} \obindex{Boolean} \obindex{integer} @@ -249,6 +251,7 @@ comparison operations): \hline \lineiii{\var{x} * \var{y}}{product of \var{x} and \var{y}}{} \lineiii{\var{x} / \var{y}}{quotient of \var{x} and \var{y}}{(1)} + \lineiii{\var{x} // \var{y}}{(floored) quotient of \var{x} and \var{y}}{(5)} \lineiii{\var{x} \%{} \var{y}}{remainder of \code{\var{x} / \var{y}}}{(4)} \hline \lineiii{-\var{x}}{\var{x} negated}{} @@ -299,6 +302,9 @@ Complex floor division operator, modulo operator, and \function{divmod()}. \deprecated{2.3}{Instead convert to float using \function{abs()} if appropriate.} +\item[(5)] +Also referred to as integer division. The resultant value is a whole integer, +though the result's type is not necessarily int. \end{description} % XXXJH exceptions: overflow (when? what operations?) zerodivision @@ -1278,7 +1284,8 @@ that do not apply to immutable instances of \class{frozenset}: \lineiii{\var{s}.add(\var{x})}{} {add element \var{x} to set \var{s}} \lineiii{\var{s}.remove(\var{x})}{} - {remove \var{x} from set \var{s}; raises KeyError if not present} + {remove \var{x} from set \var{s}; raises \exception{KeyError} + if not present} \lineiii{\var{s}.discard(\var{x})}{} {removes \var{x} from set \var{s} if present} \lineiii{\var{s}.pop()}{} @@ -1495,6 +1502,38 @@ Files have the following methods: Any operation which requires that the file be open will raise a \exception{ValueError} after the file has been closed. Calling \method{close()} more than once is allowed. + + As of Python 2.5, you can avoid having to call this method explicitly + if you use the \keyword{with} statement. For example, the following + code will automatically close \code{f} when the \keyword{with} block + is exited: + +\begin{verbatim} +from __future__ import with_statement + +with open("hello.txt") as f: + for line in f: + print line +\end{verbatim} + + In older versions of Python, you would have needed to do this to get + the same effect: + +\begin{verbatim} +f = open("hello.txt") +try: + for line in f: + print line +finally: + f.close() +\end{verbatim} + + \note{Not all ``file-like'' types in Python support use as a context + manager for the \keyword{with} statement. If your code is intended to + work with any file-like object, you can use the \function{closing()} + function in the \module{contextlib} module instead of using the object + directly. See section~\ref{context-closing} for details.} + \end{methoddesc} \begin{methoddesc}[file]{flush}{} @@ -1783,14 +1822,14 @@ class, respectively. When a method is unbound, its \code{im_self} attribute will be \code{None} and if called, an explicit \code{self} object must be passed as the first argument. In this case, \code{self} must be an instance of the unbound method's class (or a -subclass of that class), otherwise a \code{TypeError} is raised. +subclass of that class), otherwise a \exception{TypeError} is raised. Like function objects, methods objects support getting arbitrary attributes. However, since method attributes are actually stored on the underlying function object (\code{meth.im_func}), setting method attributes on either bound or unbound methods is disallowed. Attempting to set a method attribute results in a -\code{TypeError} being raised. In order to set a method attribute, +\exception{TypeError} being raised. In order to set a method attribute, you need to explicitly set it on the underlying function object: \begin{verbatim} diff --git a/Doc/lib/libsubprocess.tex b/Doc/lib/libsubprocess.tex index f48b29b..4417797 100644 --- a/Doc/lib/libsubprocess.tex +++ b/Doc/lib/libsubprocess.tex @@ -135,8 +135,8 @@ The arguments are the same as for the Popen constructor. Example: \begin{funcdesc}{check_call}{*popenargs, **kwargs} Run command with arguments. Wait for command to complete. If the exit -code was zero then return, otherwise raise CalledProcessError. The -CalledProcessError object will have the return code in the +code was zero then return, otherwise raise \exception{CalledProcessError.} +The \exception{CalledProcessError} object will have the return code in the \member{errno} attribute. The arguments are the same as for the Popen constructor. Example: diff --git a/Doc/lib/libthread.tex b/Doc/lib/libthread.tex index 4914948d..9e0c202 100644 --- a/Doc/lib/libthread.tex +++ b/Doc/lib/libthread.tex @@ -100,6 +100,19 @@ Return the status of the lock:\ \code{True} if it has been acquired by some thread, \code{False} if not. \end{methoddesc} +In addition to these methods, lock objects can also be used via the +\keyword{with} statement, e.g.: + +\begin{verbatim} +from __future__ import with_statement +import thread + +a_lock = thread.allocate_lock() + +with a_lock: + print "a_lock is locked while this executes" +\end{verbatim} + \strong{Caveats:} \begin{itemize} diff --git a/Doc/lib/libthreading.tex b/Doc/lib/libthreading.tex index 33839a4..8fb3137 100644 --- a/Doc/lib/libthreading.tex +++ b/Doc/lib/libthreading.tex @@ -675,3 +675,26 @@ keyword arguments \var{kwargs}, after \var{interval} seconds have passed. Stop the timer, and cancel the execution of the timer's action. This will only work if the timer is still in its waiting stage. \end{methoddesc} + +\subsection{Using locks, conditions, and semaphores in the \keyword{with} +statement \label{with-locks}} + +All of the objects provided by this module that have \method{acquire()} and +\method{release()} methods can be used as context managers for a \keyword{with} +statement. The \method{acquire()} method will be called when the block is +entered, and \method{release()} will be called when the block is exited. + +Currently, \class{Lock}, \class{RLock}, \class{Condition}, \class{Semaphore}, +and \class{BoundedSemaphore} objects may be used as \keyword{with} +statement context managers. For example: + +\begin{verbatim} +from __future__ import with_statement +import threading + +some_rlock = threading.RLock() + +with some_rlock: + print "some_rlock is locked while this executes" +\end{verbatim} + diff --git a/Doc/lib/libundoc.tex b/Doc/lib/libundoc.tex index 6cef183..df78152 100644 --- a/Doc/lib/libundoc.tex +++ b/Doc/lib/libundoc.tex @@ -42,15 +42,15 @@ document these. \begin{description} \item[\module{ntpath}] --- Implementation of \module{os.path} on Win32, Win64, WinCE, and -OS/2 platforms. + OS/2 platforms. \item[\module{posixpath}] --- Implementation of \module{os.path} on \POSIX. \item[\module{bsddb185}] --- Backwards compatibility module for systems which still use the Berkeley -DB 1.85 module. It is normally only available on certain BSD Unix-based -systems. It should never be used directly. + DB 1.85 module. It is normally only available on certain BSD Unix-based + systems. It should never be used directly. \end{description} @@ -62,14 +62,14 @@ systems. It should never be used directly. \item[\module{linuxaudiodev}] --- Play audio data on the Linux audio device. Replaced in Python 2.3 -by the \module{ossaudiodev} module. + by the \module{ossaudiodev} module. \item[\module{sunaudio}] --- Interpret Sun audio headers (may become obsolete or a tool/demo). \item[\module{toaiff}] --- Convert "arbitrary" sound files to AIFF files; should probably -become a tool or demo. Requires the external program \program{sox}. + become a tool or demo. Requires the external program \program{sox}. \end{description} @@ -78,12 +78,13 @@ become a tool or demo. Requires the external program \program{sox}. These modules are not normally available for import; additional work must be done to make them available. -Those which are written in Python will be installed into the directory -\file{lib-old/} installed as part of the standard library. To use -these, the directory must be added to \code{sys.path}, possibly using -\envvar{PYTHONPATH}. +%%% lib-old is empty as of Python 2.5 +% Those which are written in Python will be installed into the directory +% \file{lib-old/} installed as part of the standard library. To use +% these, the directory must be added to \code{sys.path}, possibly using +% \envvar{PYTHONPATH}. -Obsolete extension modules written in C are not built by default. +These extension modules written in C are not built by default. Under \UNIX, these must be enabled by uncommenting the appropriate lines in \file{Modules/Setup} in the build tree and either rebuilding Python if the modules are statically linked, or building and @@ -92,122 +93,11 @@ installing the shared object if using dynamically-loaded extensions. % XXX need Windows instructions! \begin{description} -\item[\module{addpack}] ---- Alternate approach to packages. Use the built-in package support -instead. - -\item[\module{cmp}] ---- File comparison function. Use the newer \refmodule{filecmp} instead. - -\item[\module{cmpcache}] ---- Caching version of the obsolete \module{cmp} module. Use the -newer \refmodule{filecmp} instead. - -\item[\module{codehack}] ---- Extract function name or line number from a function -code object (these are now accessible as attributes: -\member{co.co_name}, \member{func.func_name}, -\member{co.co_firstlineno}). - -\item[\module{dircmp}] ---- Class to build directory diff tools on (may become a demo or tool). -\deprecated{2.0}{The \refmodule{filecmp} module replaces -\module{dircmp}.} - -\item[\module{dump}] ---- Print python code that reconstructs a variable. - -\item[\module{fmt}] ---- Text formatting abstractions (too slow). - -\item[\module{lockfile}] ---- Wrapper around FCNTL file locking (use -\function{fcntl.lockf()}/\function{flock()} instead; see \refmodule{fcntl}). - -\item[\module{newdir}] ---- New \function{dir()} function (the standard \function{dir()} is -now just as good). - -\item[\module{Para}] ---- Helper for \module{fmt}. - -\item[\module{poly}] ---- Polynomials. - -\item[\module{rand}] ---- Old interface to the random number generator. - -\item[\module{regex}] ---- Emacs-style regular expression support; may still be used in some -old code (extension module). Refer to the -\citetitle[http://www.python.org/doc/1.6/lib/module-regex.html]{Python -1.6 Documentation} for documentation. - -\item[\module{regsub}] ---- Regular expression based string replacement utilities, for use -with \module{regex} (extension module). Refer to the -\citetitle[http://www.python.org/doc/1.6/lib/module-regsub.html]{Python -1.6 Documentation} for documentation. - -\item[\module{statcache}] ---- Caches the results of os.stat(). Using the cache can be fragile -and error-prone, just use \code{os.stat()} directly. - -\item[\module{tb}] ---- Print tracebacks, with a dump of local variables (use -\function{pdb.pm()} or \refmodule{traceback} instead). - \item[\module{timing}] ---- Measure time intervals to high resolution (use -\function{time.clock()} instead). (This is an extension module.) - -\item[\module{tzparse}] ---- Parse a timezone specification (unfinished; may disappear in the -future, and does not work when the \envvar{TZ} environment variable is -not set). - -\item[\module{util}] ---- Useful functions that don't fit elsewhere. - -\item[\module{whatsound}] ---- Recognize sound files; use \refmodule{sndhdr} instead. - -\item[\module{whrandom}] ---- Old random number generator. Use \module{random} instead. - -\item[\module{zmod}] ---- Compute properties of mathematical ``fields.'' +--- Measure time intervals to high resolution (use \function{time.clock()} + instead). \end{description} - -The following modules are obsolete, but are likely to re-surface as -tools or scripts: - -\begin{description} -\item[\module{find}] ---- Find files matching pattern in directory tree. - -\item[\module{grep}] ---- \program{grep} implementation in Python. - -\item[\module{packmail}] ---- Create a self-unpacking \UNIX{} shell archive. -\end{description} - - -The following modules were documented in previous versions of this -manual, but are now considered obsolete. The source for the -documentation is still available as part of the documentation source -archive. - -\begin{description} -\item[\module{ni}] ---- Import modules in ``packages.'' Basic package support is now -built in. The built-in support is very similar to what is provided in -this module. -\end{description} - - \section{SGI-specific Extension modules} The following are SGI specific, and may be out of touch with the @@ -219,5 +109,5 @@ current version of reality. \item[\module{sv}] --- Interface to the ``simple video'' board on SGI Indigo -(obsolete hardware). + (obsolete hardware). \end{description} diff --git a/Doc/lib/liburllib2.tex b/Doc/lib/liburllib2.tex index 706c54b..e0c4568 100644 --- a/Doc/lib/liburllib2.tex +++ b/Doc/lib/liburllib2.tex @@ -384,7 +384,7 @@ determined by sorting the handler instances. \method{\var{protocol}_open()} are called to handle the request. This stage ends when a handler either returns a non-\constant{None} value (ie. a response), or raises an exception - (usually URLError). Exceptions are allowed to propagate. + (usually \exception{URLError}). Exceptions are allowed to propagate. In fact, the above algorithm is first tried for methods named \method{default_open}. If all such methods return diff --git a/Doc/lib/liburlparse.tex b/Doc/lib/liburlparse.tex index 8f80d6b..f18efe9 100644 --- a/Doc/lib/liburlparse.tex +++ b/Doc/lib/liburlparse.tex @@ -23,50 +23,76 @@ draft!). It supports the following URL schemes: \code{file}, \code{ftp}, \code{gopher}, \code{hdl}, \code{http}, \code{https}, \code{imap}, \code{mailto}, \code{mms}, \code{news}, \code{nntp}, \code{prospero}, \code{rsync}, \code{rtsp}, \code{rtspu}, -\code{sftp}, \code{shttp}, \code{sip}, \code{snews}, \code{svn}, +\code{sftp}, \code{shttp}, \code{sip}, \code{sips}, \code{snews}, \code{svn}, \code{svn+ssh}, \code{telnet}, \code{wais}. -\versionadded[Support for the \code{sftp} scheme]{2.5} + +\versionadded[Support for the \code{sftp} and \code{sips} schemes]{2.5} The \module{urlparse} module defines the following functions: -\begin{funcdesc}{urlparse}{urlstring\optional{, default_scheme\optional{, allow_fragments}}} -Parse a URL into 6 components, returning a 6-tuple: (addressing -scheme, network location, path, parameters, query, fragment -identifier). This corresponds to the general structure of a URL: +\begin{funcdesc}{urlparse}{urlstring\optional{, + default_scheme\optional{, allow_fragments}}} +Parse a URL into six components, returning a 6-tuple. This +corresponds to the general structure of a URL: \code{\var{scheme}://\var{netloc}/\var{path};\var{parameters}?\var{query}\#\var{fragment}}. Each tuple item is a string, possibly empty. -The components are not broken up in smaller parts (e.g. the network +The components are not broken up in smaller parts (for example, the network location is a single string), and \% escapes are not expanded. -The delimiters as shown above are not part of the tuple items, +The delimiters as shown above are not part of the result, except for a leading slash in the \var{path} component, which is -retained if present. - -Example: - -\begin{verbatim} -urlparse('http://www.cwi.nl:80/%7Eguido/Python.html') -\end{verbatim} - -yields the tuple +retained if present. For example: \begin{verbatim} +>>> from urlparse import urlparse +>>> o = urlparse('http://www.cwi.nl:80/%7Eguido/Python.html') +>>> o ('http', 'www.cwi.nl:80', '/%7Eguido/Python.html', '', '', '') +>>> o.scheme +'http' +>>> o.port +80 +>>> o.geturl() +'http://www.cwi.nl:80/%7Eguido/Python.html' \end{verbatim} If the \var{default_scheme} argument is specified, it gives the -default addressing scheme, to be used only if the URL string does not +default addressing scheme, to be used only if the URL does not specify one. The default value for this argument is the empty string. -If the \var{allow_fragments} argument is zero, fragment identifiers +If the \var{allow_fragments} argument is false, fragment identifiers are not allowed, even if the URL's addressing scheme normally does -support them. The default value for this argument is \code{1}. +support them. The default value for this argument is \constant{True}. + +The return value is actually an instance of a subclass of +\pytype{tuple}. This class has the following additional read-only +convenience attributes: + +\begin{tableiv}{l|c|l|c}{member}{Attribute}{Index}{Value}{Value if not present} + \lineiv{scheme} {0} {URL scheme specifier} {empty string} + \lineiv{netloc} {1} {Network location part} {empty string} + \lineiv{path} {2} {Hierarchical path} {empty string} + \lineiv{params} {3} {Parameters for last path element} {empty string} + \lineiv{query} {4} {Query component} {empty string} + \lineiv{fragment}{5} {Fragment identifier} {empty string} + \lineiv{username}{ } {User name} {\constant{None}} + \lineiv{password}{ } {Password} {\constant{None}} + \lineiv{hostname}{ } {Host name (lower case)} {\constant{None}} + \lineiv{port} { } {Port number as integer, if present} {\constant{None}} +\end{tableiv} + +See section~\ref{urlparse-result-object}, ``Results of +\function{urlparse()} and \function{urlsplit()},'' for more +information on the result object. + +\versionchanged[Added attributes to return value]{2.5} \end{funcdesc} -\begin{funcdesc}{urlunparse}{tuple} -Construct a URL string from a tuple as returned by \code{urlparse()}. +\begin{funcdesc}{urlunparse}{parts} +Construct a URL from a tuple as returned by \code{urlparse()}. +The \var{parts} argument be any six-item iterable. This may result in a slightly different, but equivalent URL, if the -URL that was parsed originally had redundant delimiters, e.g. a ? with -an empty query (the draft states that these are equivalent). +URL that was parsed originally had unnecessary delimiters (for example, +a ? with an empty query; the RFC states that these are equivalent). \end{funcdesc} \begin{funcdesc}{urlsplit}{urlstring\optional{, @@ -79,12 +105,38 @@ the URL (see \rfc{2396}) is wanted. A separate function is needed to separate the path segments and parameters. This function returns a 5-tuple: (addressing scheme, network location, path, query, fragment identifier). + +The return value is actually an instance of a subclass of +\pytype{tuple}. This class has the following additional read-only +convenience attributes: + +\begin{tableiv}{l|c|l|c}{member}{Attribute}{Index}{Value}{Value if not present} + \lineiv{scheme} {0} {URL scheme specifier} {empty string} + \lineiv{netloc} {1} {Network location part} {empty string} + \lineiv{path} {2} {Hierarchical path} {empty string} + \lineiv{query} {3} {Query component} {empty string} + \lineiv{fragment} {4} {Fragment identifier} {empty string} + \lineiv{username} { } {User name} {\constant{None}} + \lineiv{password} { } {Password} {\constant{None}} + \lineiv{hostname} { } {Host name (lower case)} {\constant{None}} + \lineiv{port} { } {Port number as integer, if present} {\constant{None}} +\end{tableiv} + +See section~\ref{urlparse-result-object}, ``Results of +\function{urlparse()} and \function{urlsplit()},'' for more +information on the result object. + \versionadded{2.2} +\versionchanged[Added attributes to return value]{2.5} \end{funcdesc} -\begin{funcdesc}{urlunsplit}{tuple} +\begin{funcdesc}{urlunsplit}{parts} Combine the elements of a tuple as returned by \function{urlsplit()} into a complete URL as a string. +The \var{parts} argument be any five-item iterable. +This may result in a slightly different, but equivalent URL, if the +URL that was parsed originally had unnecessary delimiters (for example, +a ? with an empty query; the RFC states that these are equivalent). \versionadded{2.2} \end{funcdesc} @@ -93,22 +145,16 @@ Construct a full (``absolute'') URL by combining a ``base URL'' (\var{base}) with a ``relative URL'' (\var{url}). Informally, this uses components of the base URL, in particular the addressing scheme, the network location and (part of) the path, to provide missing -components in the relative URL. - -Example: - -\begin{verbatim} -urljoin('http://www.cwi.nl/%7Eguido/Python.html', 'FAQ.html') -\end{verbatim} - -yields the string +components in the relative URL. For example: \begin{verbatim} +>>> from urlparse import urljoin +>>> urljoin('http://www.cwi.nl/%7Eguido/Python.html', 'FAQ.html') 'http://www.cwi.nl/%7Eguido/FAQ.html' \end{verbatim} -The \var{allow_fragments} argument has the same meaning as for -\code{urlparse()}. +The \var{allow_fragments} argument has the same meaning and default as +for \function{urlparse()}. \end{funcdesc} \begin{funcdesc}{urldefrag}{url} @@ -133,3 +179,61 @@ in \var{url}, returns \var{url} unmodified and an empty string. both Uniform Resource Names (URNs) and Uniform Resource Locators (URLs).} \end{seealso} + + +\subsection{Results of \function{urlparse()} and \function{urlsplit()} + \label{urlparse-result-object}} + +The result objects from the \function{urlparse()} and +\function{urlsplit()} functions are subclasses of the \pytype{tuple} +type. These subclasses add the attributes described in those +functions, as well as provide an additional method: + +\begin{methoddesc}[ParseResult]{geturl}{} + Return the re-combined version of the original URL as a string. + This may differ from the original URL in that the scheme will always + be normalized to lower case and empty components may be dropped. + Specifically, empty parameters, queries, and fragment identifiers + will be removed. + + The result of this method is a fixpoint if passed back through the + original parsing function: + +\begin{verbatim} +>>> import urlparse +>>> url = 'HTTP://www.Python.org/doc/#' + +>>> r1 = urlparse.urlsplit(url) +>>> r1.geturl() +'http://www.Python.org/doc/' + +>>> r2 = urlparse.urlsplit(r1.geturl()) +>>> r2.geturl() +'http://www.Python.org/doc/' +\end{verbatim} + +\versionadded{2.5} +\end{methoddesc} + +The following classes provide the implementations of the parse results:: + +\begin{classdesc*}{BaseResult} + Base class for the concrete result classes. This provides most of + the attribute definitions. It does not provide a \method{geturl()} + method. It is derived from \class{tuple}, but does not override the + \method{__init__()} or \method{__new__()} methods. +\end{classdesc*} + + +\begin{classdesc}{ParseResult}{scheme, netloc, path, params, query, fragment} + Concrete class for \function{urlparse()} results. The + \method{__new__()} method is overridden to support checking that the + right number of arguments are passed. +\end{classdesc} + + +\begin{classdesc}{SplitResult}{scheme, netloc, path, query, fragment} + Concrete class for \function{urlsplit()} results. The + \method{__new__()} method is overridden to support checking that the + right number of arguments are passed. +\end{classdesc} diff --git a/Doc/lib/libwarnings.tex b/Doc/lib/libwarnings.tex index 8655451..7b829a0 100644 --- a/Doc/lib/libwarnings.tex +++ b/Doc/lib/libwarnings.tex @@ -169,7 +169,8 @@ the latter would defeat the purpose of the warning message). \end{funcdesc} \begin{funcdesc}{warn_explicit}{message, category, filename, - lineno\optional{, module\optional{, registry}}} + lineno\optional{, module\optional{, registry\optional{, + module_globals}}}} This is a low-level interface to the functionality of \function{warn()}, passing in explicitly the message, category, filename and line number, and optionally the module name and the @@ -179,6 +180,11 @@ stripped; if no registry is passed, the warning is never suppressed. \var{message} must be a string and \var{category} a subclass of \exception{Warning} or \var{message} may be a \exception{Warning} instance, in which case \var{category} will be ignored. + +\var{module_globals}, if supplied, should be the global namespace in use +by the code for which the warning is issued. (This argument is used to +support displaying source for modules found in zipfiles or other +non-filesystem import sources, and was added in Python 2.5.) \end{funcdesc} \begin{funcdesc}{showwarning}{message, category, filename, diff --git a/Doc/lib/libxmlrpclib.tex b/Doc/lib/libxmlrpclib.tex index 0fb88c5..1c36f99 100644 --- a/Doc/lib/libxmlrpclib.tex +++ b/Doc/lib/libxmlrpclib.tex @@ -203,7 +203,7 @@ It also supports certain of Python's built-in operators through \subsection{Binary Objects \label{binary-objects}} -This class may initialized from string data (which may include NULs). +This class may be initialized from string data (which may include NULs). The primary access to the content of a \class{Binary} object is provided by an attribute: @@ -303,10 +303,6 @@ Convert any Python value to one of the XML-RPC Boolean constants, \code{True} or \code{False}. \end{funcdesc} -\begin{funcdesc}{binary}{data} -Trivially convert any Python string to a \class{Binary} object. -\end{funcdesc} - \begin{funcdesc}{dumps}{params\optional{, methodname\optional{, methodresponse\optional{, encoding\optional{, allow_none}}}}} diff --git a/Doc/lib/libzipfile.tex b/Doc/lib/libzipfile.tex index 32ca3e0..4e06ef6 100644 --- a/Doc/lib/libzipfile.tex +++ b/Doc/lib/libzipfile.tex @@ -141,10 +141,17 @@ cat myzip.zip >> python.exe Write the file named \var{filename} to the archive, giving it the archive name \var{arcname} (by default, this will be the same as \var{filename}, but without a drive letter and with leading path - separators removed). If given, \var{compress_type} overrides the value - given for the \var{compression} parameter to the constructor for - the new entry. The archive must be open with mode \code{'w'} or - \code{'a'}. + separators removed). If given, \var{compress_type} overrides the + value given for the \var{compression} parameter to the constructor + for the new entry. The archive must be open with mode \code{'w'} + or \code{'a'}. + + \note{There is no official file name encoding for ZIP files. + If you have unicode file names, please convert them to byte strings + in your desired encoding before passing them to \method{write()}. + WinZip interprets all file names as encoded in CP437, also known + as DOS Latin.} + \note{Archive names should be relative to the archive root, that is, they should not start with a path separator.} \end{methoddesc} diff --git a/Doc/lib/libzipimport.tex b/Doc/lib/libzipimport.tex index 0a60b29..770ea21 100644 --- a/Doc/lib/libzipimport.tex +++ b/Doc/lib/libzipimport.tex @@ -69,8 +69,8 @@ The available attributes of this module are: \begin{classdesc}{zipimporter}{archivepath} Create a new zipimporter instance. \var{archivepath} must be a path to - a zipfile. \class{ZipImportError} is raised if \var{archivepath} doesn't - point to a valid ZIP archive. + a zipfile. \exception{ZipImportError} is raised if \var{archivepath} + doesn't point to a valid ZIP archive. \end{classdesc} \begin{methoddesc}{find_module}{fullname\optional{, path}} @@ -83,7 +83,7 @@ The available attributes of this module are: \begin{methoddesc}{get_code}{fullname} Return the code object for the specified module. Raise - \class{ZipImportError} if the module couldn't be found. + \exception{ZipImportError} if the module couldn't be found. \end{methoddesc} \begin{methoddesc}{get_data}{pathname} @@ -93,20 +93,20 @@ The available attributes of this module are: \begin{methoddesc}{get_source}{fullname} Return the source code for the specified module. Raise - \class{ZipImportError} if the module couldn't be found, return + \exception{ZipImportError} if the module couldn't be found, return \constant{None} if the archive does contain the module, but has no source for it. \end{methoddesc} \begin{methoddesc}{is_package}{fullname} Return True if the module specified by \var{fullname} is a package. - Raise \class{ZipImportError} if the module couldn't be found. + Raise \exception{ZipImportError} if the module couldn't be found. \end{methoddesc} \begin{methoddesc}{load_module}{fullname} Load the module specified by \var{fullname}. \var{fullname} must be the fully qualified (dotted) module name. It returns the imported - module, or raises \class{ZipImportError} if it wasn't found. + module, or raises \exception{ZipImportError} if it wasn't found. \end{methoddesc} \subsection{Examples} diff --git a/Doc/lib/libzlib.tex b/Doc/lib/libzlib.tex index 2df8b85..dfbb43d 100644 --- a/Doc/lib/libzlib.tex +++ b/Doc/lib/libzlib.tex @@ -166,11 +166,14 @@ continue. If \var{max_length} is not supplied then the whole input is decompressed, and \member{unconsumed_tail} is an empty string. \end{methoddesc} -\begin{methoddesc}[Decompress]{flush}{} +\begin{methoddesc}[Decompress]{flush}{\optional{length}} All pending input is processed, and a string containing the remaining uncompressed output is returned. After calling \method{flush()}, the \method{decompress()} method cannot be called again; the only realistic action is to delete the object. + +The optional parameter \var{length} sets the initial size of the +output buffer. \end{methoddesc} \begin{seealso} diff --git a/Doc/lib/mimelib.tex b/Doc/lib/mimelib.tex index 67de597..491d844 100644 --- a/Doc/lib/mimelib.tex +++ b/Doc/lib/mimelib.tex @@ -12,9 +12,9 @@ \authoraddress{\email{barry@python.org}} \date{\today} -\release{3.0} % software release, not documentation +\release{4.0} % software release, not documentation \setreleaseinfo{} % empty for final release -\setshortversion{3.0} % major.minor only for software +\setshortversion{4.0} % major.minor only for software \begin{document} @@ -38,11 +38,11 @@ The \module{email} package provides classes and utilities to create, parse, generate, and modify email messages, conforming to all the relevant email and MIME related RFCs. -This document describes version 3.0 of the \module{email} package, which is -distributed with Python 2.4 and is available as a standalone distutils-based -package for use with Python 2.3. \module{email} 3.0 is not compatible with -Python versions earlier than 2.3. For more information about the -\module{email} package, including download links and mailing lists, see +This document describes version 4.0 of the \module{email} package, which is +distributed with Python 2.5 and is available as a standalone distutils-based +package for use with earlier Python versions. \module{email} 4.0 is not +compatible with Python versions earlier than 2.3. For more information about +the \module{email} package, including download links and mailing lists, see \ulink{Python's email SIG}{http://www.python.org/sigs/email-sig}. The documentation that follows was written for the Python project, so @@ -51,7 +51,8 @@ package documentation, there are a few notes to be aware of: \begin{itemize} \item Deprecation and ``version added'' notes are relative to the - Python version a feature was added or deprecated. + Python version a feature was added or deprecated. See + the package history in section \ref{email-pkg-history} for details. \item If you're reading this documentation as part of the standalone \module{email} package, some of the internal links to diff --git a/Doc/lib/xmldomminidom.tex b/Doc/lib/xmldomminidom.tex index f7657eb..093915f 100644 --- a/Doc/lib/xmldomminidom.tex +++ b/Doc/lib/xmldomminidom.tex @@ -165,7 +165,7 @@ XML. With an explicit \var{encoding} argument, the result is a byte string in the specified encoding. It is recommended that this argument is -always specified. To avoid UnicodeError exceptions in case of +always specified. To avoid \exception{UnicodeError} exceptions in case of unrepresentable text data, the encoding argument should be specified as "utf-8". diff --git a/Doc/python-docs.txt b/Doc/python-docs.txt index 017fece..bf475b6 100644 --- a/Doc/python-docs.txt +++ b/Doc/python-docs.txt @@ -180,4 +180,4 @@ Answers to the Questions whether it's safe to remove, see the "Why is Python Installed on my Computer?" FAQ, found at: - http://www.python.org/doc/faq/installed.html + http://www.python.org/doc/faq/installed/ diff --git a/Doc/ref/ref3.tex b/Doc/ref/ref3.tex index 737b861..964013f 100644 --- a/Doc/ref/ref3.tex +++ b/Doc/ref/ref3.tex @@ -1035,7 +1035,7 @@ by the built-in \function{classmethod()} constructor. %========================================================================= \section{New-style and classic classes} -Classes and instances come in two flavours: old-style or classic, and new-style. +Classes and instances come in two flavors: old-style or classic, and new-style. Up to Python 2.1, old-style classes were the only flavour available to the user. The concept of (old-style) class is unrelated to the concept of type: if @@ -1065,10 +1065,14 @@ the way special methods are invoked. Others are "fixes" that could not be implemented before for compatibility concerns, like the method resolution order in case of multiple inheritance. -This manuel is not up-to-date with respect to new-style classes. For now, +This manual is not up-to-date with respect to new-style classes. For now, please see \url{http://www.python.org/doc/newstyle.html} for more information. -The plan is to eventually drop old-style classes, leaving only the semantics of new-style classes. This change will probably only be feasible in Python 3.0. +The plan is to eventually drop old-style classes, leaving only the semantics of +new-style classes. This change will probably only be feasible in Python 3.0. +\index{class}{new-style} +\index{class}{classic} +\index{class}{old-style} %========================================================================= \section{Special method names\label{specialnames}} @@ -2053,14 +2057,15 @@ exception is raised. But see the following exception: \item Exception to the previous item: if the left operand is an instance of -a built-in type or a new-style class, and the right operand is an -instance of a proper subclass of that type or class, the right -operand's \method{__rop__()} method is tried \emph{before} the left -operand's \method{__op__()} method. This is done so that a subclass can -completely override binary operators. Otherwise, the left operand's -__op__ method would always accept the right operand: when an instance -of a given class is expected, an instance of a subclass of that class -is always acceptable. +a built-in type or a new-style class, and the right operand is an instance +of a proper subclass of that type or class and overrides the base's +\method{__rop__()} method, the right operand's \method{__rop__()} method +is tried \emph{before} the left operand's \method{__op__()} method. + +This is done so that a subclass can completely override binary operators. +Otherwise, the left operand's \method{__op__()} method would always +accept the right operand: when an instance of a given class is expected, +an instance of a subclass of that class is always acceptable. \item @@ -2106,3 +2111,63 @@ implement a \method{__coerce__()} method, for use by the built-in \function{coerce()} function. \end{itemize} + +\subsection{Context Managers and Contexts\label{context-managers}} + +\versionadded{2.5} + +A \dfn{context manager} is an object that manages the entry to, and exit +from, a \dfn{context} surrounding a block of code. Context managers are +normally invoked using the \keyword{with} statement (described in +section~\ref{with}), but can also be used by directly invoking their +methods. +\stindex{with} +\index{context manager} +\index{context} + +Typical uses of context managers include saving and restoring various +kinds of global state, locking and unlocking resources, closing opened +files, etc. + +\begin{methoddesc}[context manager]{__context__}{self} +Invoked when the object is used as the context expression of a +\keyword{with} statement. The return value must implement +\method{__enter__()} and \method{__exit__()} methods. Simple context +managers that wish to directly +implement \method{__enter__()} and \method{__exit__()} should just +return \var{self}. + +Context managers written in Python can also implement this method using +a generator function decorated with the +\function{contextlib.contextmanager} decorator, as this can be simpler +than writing individual \method{__enter__()} and \method{__exit__()} +methods when the state to be managed is complex. +\end{methoddesc} + +\begin{methoddesc}[context]{__enter__}{self} +Enter the context defined by this object. The \keyword{with} statement +will bind this method's return value to the target(s) specified in the +\keyword{as} clause of the statement, if any. +\end{methoddesc} + +\begin{methoddesc}[context]{__exit__}{exc_type, exc_value, traceback} +Exit the context defined by this object. The parameters describe the +exception that caused the context to be exited. If the context was +exited without an exception, all three arguments will be +\constant{None}. + +If an exception is supplied, and the method wishes to suppress the +exception (i.e., prevent it from being propagated), it should return a +true value. Otherwise, the exception will be processed normally upon +exit from this method. + +Note that \method{__exit__} methods should not reraise the passed-in +exception; this is the caller's responsibility. +\end{methoddesc} + +\begin{seealso} + \seepep{0343}{The "with" statement} + {The specification, background, and examples for the + Python \keyword{with} statement.} +\end{seealso} + diff --git a/Doc/ref/ref6.tex b/Doc/ref/ref6.tex index d1d23ac..1eb1258 100644 --- a/Doc/ref/ref6.tex +++ b/Doc/ref/ref6.tex @@ -488,11 +488,12 @@ enough information is saved so that the next time \method{next()} is invoked, the function can proceed exactly as if the \keyword{yield} statement were just another external call. -The \keyword{yield} statement is not allowed in the \keyword{try} -clause of a \keyword{try} ...\ \keyword{finally} construct. The -difficulty is that there's no guarantee the generator will ever be -resumed, hence no guarantee that the \keyword{finally} block will ever -get executed. +As of Python version 2.5, the \keyword{yield} statement is now +allowed in the \keyword{try} clause of a \keyword{try} ...\ +\keyword{finally} construct. If the generator is not resumed before +it is finalized (by reaching a zero reference count or by being garbage +collected), the generator-iterator's \method{close()} method will be +called, allowing any pending \keyword{finally} clauses to execute. \begin{notice} In Python 2.2, the \keyword{yield} statement is only allowed @@ -510,6 +511,11 @@ from __future__ import generators \seepep{0255}{Simple Generators} {The proposal for adding generators and the \keyword{yield} statement to Python.} + + \seepep{0342}{Coroutines via Enhanced Generators} + {The proposal that, among other generator enhancements, + proposed allowing \keyword{yield} to appear inside a + \keyword{try} ... \keyword{finally} block.} \end{seealso} diff --git a/Doc/ref/ref7.tex b/Doc/ref/ref7.tex index 90627a4..a2d46a8 100644 --- a/Doc/ref/ref7.tex +++ b/Doc/ref/ref7.tex @@ -46,6 +46,7 @@ Summarizing: \productioncont{| \token{while_stmt}} \productioncont{| \token{for_stmt}} \productioncont{| \token{try_stmt}} + \productioncont{| \token{with_stmt}} \productioncont{| \token{funcdef}} \productioncont{| \token{classdef}} \production{suite} @@ -305,8 +306,75 @@ statement to generate exceptions may be found in section~\ref{raise}. \section{The \keyword{with} statement\label{with}} \stindex{with} -The \keyword{with} statement specifies +\versionadded{2.5} +The \keyword{with} statement is used to wrap the execution of a block +with methods defined by a context manager (see +section~\ref{context-managers}). This allows common +\keyword{try}...\keyword{except}...\keyword{finally} usage patterns to +be encapsulated as context managers for convenient reuse. + +\begin{productionlist} + \production{with_stmt} + {"with" \token{expression} ["as" target_list] ":" \token{suite}} +\end{productionlist} + +The execution of the \keyword{with} statement proceeds as follows: + +\begin{enumerate} + +\item The expression is evaluated, to obtain a context manager +object. + +\item The context manager's \method{__context__()} method is invoked to +obtain a context object. + +\item The context object's \method{__enter__()} method is invoked. + +\item If a target list was included in the \keyword{with} +statement, the return value from \method{__enter__()} is assigned to it. + +\note{The \keyword{with} statement guarantees that if the +\method{__enter__()} method returns without an error, then +\method{__exit__()} will always be called. Thus, if an error occurs +during the assignment to the target list, it will be treated the same as +an error occurring within the suite would be. See step 6 below.} + +\item The suite is executed. + +\item The context object's \method{__exit__()} method is invoked. If an +exception caused the suite to be exited, its type, value, and +traceback are passed as arguments to \method{__exit__()}. Otherwise, +three \constant{None} arguments are supplied. + +If the suite was exited due to an exception, and the return +value from the \method{__exit__()} method was false, the exception is +reraised. If the return value was true, the exception is suppressed, and +execution continues with the statement following the \keyword{with} +statement. + +If the suite was exited for any reason other than an exception, the +return value from \method{__exit__()} is ignored, and execution proceeds +at the normal location for the kind of exit that was taken. + +\end{enumerate} + +\begin{notice} +In Python 2.5, the \keyword{with} statement is only allowed +when the \code{with_statement} feature has been enabled. It will always +be enabled in Python 2.6. This \code{__future__} import statement can +be used to enable the feature: + +\begin{verbatim} +from __future__ import with_statement +\end{verbatim} +\end{notice} + +\begin{seealso} + \seepep{0343}{The "with" statement} + {The specification, background, and examples for the + Python \keyword{with} statement.} +\end{seealso} \section{Function definitions\label{function}} \indexii{function}{definition} diff --git a/Doc/tools/prechm.py b/Doc/tools/prechm.py index 7b2f393..57a43fd 100644 --- a/Doc/tools/prechm.py +++ b/Doc/tools/prechm.py @@ -150,6 +150,22 @@ class Book: # Library Doc list of books: # each 'book' : (Dir, Title, First page, Content page, Index page) supported_libraries = { + '2.5': + [ + Book('.', 'Main page', 'index'), + Book('.', 'Global Module Index', 'modindex'), + Book('whatsnew', "What's New", 'index', 'contents'), + Book('tut','Tutorial','tut','node2'), + Book('lib','Library Reference','lib','contents','genindex'), + Book('ref','Language Reference','ref','contents','genindex'), + Book('mac','Macintosh Reference','mac','contents','genindex'), + Book('ext','Extending and Embedding','ext','contents'), + Book('api','Python/C API','api','contents','genindex'), + Book('doc','Documenting Python','doc','contents'), + Book('inst','Installing Python Modules', 'inst', 'index'), + Book('dist','Distributing Python Modules', 'dist', 'index', 'genindex'), + ], + '2.4': [ Book('.', 'Main page', 'index'), diff --git a/Doc/tools/toc2bkm.py b/Doc/tools/toc2bkm.py index 636459a..ab669ba 100755 --- a/Doc/tools/toc2bkm.py +++ b/Doc/tools/toc2bkm.py @@ -44,6 +44,20 @@ _transition_map = { INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection") +class BadSectionNesting(Exception): + """Raised for unsupported section level transitions.""" + + def __init__(self, level, newsection, path, lineno): + self.level = level + self.newsection = newsection + self.path = path + self.lineno = lineno + + def __str__(self): + return ("illegal transition from %s to %s at %s (line %s)" + % (self.level, self.newsection, self.path, self.lineno)) + + def parse_toc(fp, bigpart=None): toc = top = [] stack = [toc] @@ -65,7 +79,10 @@ def parse_toc(fp, bigpart=None): if stype not in INCLUDED_LEVELS: # we don't want paragraphs & subparagraphs continue - direction = _transition_map[(level, stype)] + try: + direction = _transition_map[(level, stype)] + except KeyError: + raise BadSectionNesting(level, stype, fp.name, lineno) if direction == OUTER_TO_INNER: toc = toc[-1][-1] stack.insert(0, toc) diff --git a/Doc/tut/tut.tex b/Doc/tut/tut.tex index 054985b..7f9a7ee 100644 --- a/Doc/tut/tut.tex +++ b/Doc/tut/tut.tex @@ -1012,7 +1012,7 @@ individual elements of a list: \end{verbatim} Assignment to slices is also possible, and this can even change the size -of the list: +of the list or clear it entirely: \begin{verbatim} >>> # Replace some items: @@ -1027,9 +1027,14 @@ of the list: ... a[1:1] = ['bletch', 'xyzzy'] >>> a [123, 'bletch', 'xyzzy', 1234] ->>> a[:0] = a # Insert (a copy of) itself at the beginning +>>> # Insert (a copy of) itself at the beginning +>>> a[:0] = a >>> a [123, 'bletch', 'xyzzy', 1234, 123, 'bletch', 'xyzzy', 1234] +>>> # Clear the list: replace all items with an empty list +>>> a[:] = [] +>>> a +[] \end{verbatim} The built-in function \function{len()} also applies to lists: @@ -2023,9 +2028,9 @@ applied to complex expressions and nested functions: There is a way to remove an item from a list given its index instead of its value: the \keyword{del} statement. This differs from the \method{pop()}) method which returns a value. The \keyword{del} -statement can also be used to -remove slices from a list (which we did earlier by assignment of an -empty list to the slice). For example: +statement can also be used to remove slices from a list or clear the +entire list (which we did earlier by assignment of an empty list to +the slice). For example: \begin{verbatim} >>> a = [-1, 1, 66.25, 333, 333, 1234.5] @@ -2035,6 +2040,9 @@ empty list to the slice). For example: >>> del a[2:4] >>> a [1, 66.25, 1234.5] +>>> del a[:] +>>> a +[] \end{verbatim} \keyword{del} can also be used to delete entire variables: @@ -3710,19 +3718,49 @@ Traceback (most recent call last): KeyboardInterrupt \end{verbatim} -A \emph{finally clause} is executed whether or not an exception has -occurred in the try clause. When an exception has occurred, it is -re-raised after the finally clause is executed. The finally clause is -also executed ``on the way out'' when the \keyword{try} statement is -left via a \keyword{break} or \keyword{return} statement. +A \emph{finally clause} is always executed before leaving the +\keyword{try} statement, whether an exception has occurred or not. +When an exception has occurred in the \keyword{try} clause and has not +been handled by an \keyword{except} clause (or it has occurred in a +\keyword{except} or \keyword{else} clause), it is re-raised after the +\keyword{finally} clause has been executed. The \keyword{finally} clause +is also executed ``on the way out'' when any other clause of the +\keyword{try} statement is left via a \keyword{break}, \keyword{continue} +or \keyword{return} statement. A more complicated example: + +\begin{verbatim} +>>> def divide(x, y): +... try: +... result = x / y +... except ZeroDivisionError: +... print "division by zero!" +... else: +... print "result is", result +... finally: +... print "executing finally clause" +... +>>> divide(2, 1) +result is 2 +executing finally clause +>>> divide(2, 0) +division by zero! +executing finally clause +>>> divide("2", "1") +executing finally clause +Traceback (most recent call last): + File "", line 1, in ? + File "", line 3, in divide +TypeError: unsupported operand type(s) for /: 'str' and 'str' +\end{verbatim} -The code in the finally clause is useful for releasing external -resources (such as files or network connections), regardless of -whether the use of the resource was successful. +As you can see, the \keyword{finally} clause is executed in any +event. The \exception{TypeError} raised by dividing two strings +is not handled by the \keyword{except} clause and therefore +re-raised after the \keyword{finally} clauses has been executed. -A \keyword{try} statement must either have one or more except clauses -or one finally clause, but not both (because it would be unclear which -clause should be executed first). +In real world applications, the \keyword{finally} clause is useful +for releasing external resources (such as files or network connections), +regardless of whether the use of the resource was successful. \chapter{Classes \label{classes}} @@ -5340,7 +5378,7 @@ users. \item \citetitle[../ref/ref.html]{Language Reference}: A detailed explanation of Python's syntax and semantics. It's heavy reading, -but is useful as a +but is useful as a complete guide to the language itself. \end{itemize} diff --git a/Doc/whatsnew/whatsnew20.tex b/Doc/whatsnew/whatsnew20.tex index b89ac19..bf458fa 100644 --- a/Doc/whatsnew/whatsnew20.tex +++ b/Doc/whatsnew/whatsnew20.tex @@ -1214,8 +1214,8 @@ the function to be called on exit. \item{\module{gettext}:} This module provides internationalization (I18N) and localization (L10N) support for Python programs by providing an interface to the GNU gettext message catalog library. -(Integrated by Barry Warsaw, from separate contributions by Martin von -Loewis, Peter Funk, and James Henstridge.) +(Integrated by Barry Warsaw, from separate contributions by Martin +von~L\"owis, Peter Funk, and James Henstridge.) \item{\module{linuxaudiodev}:} Support for the \file{/dev/audio} device on Linux, a twin to the existing \module{sunaudiodev} module. diff --git a/Doc/whatsnew/whatsnew21.tex b/Doc/whatsnew/whatsnew21.tex index b7ea3f2..f3d0245 100644 --- a/Doc/whatsnew/whatsnew21.tex +++ b/Doc/whatsnew/whatsnew21.tex @@ -5,7 +5,7 @@ % $Id$ \title{What's New in Python 2.1} -\release{1.00} +\release{1.01} \author{A.M. Kuchling} \authoraddress{ \strong{Python Software Foundation}\\ @@ -16,14 +16,7 @@ \section{Introduction} -It's that time again... time for a new Python release, Python 2.1. -One recent goal of the Python development team has been to accelerate -the pace of new releases, with a new release coming every 6 to 9 -months. 2.1 is the first release to come out at this faster pace, with -the first alpha appearing in January, 3 months after the final version -of 2.0 was released. - -This article explains the new features in 2.1. While there aren't as +This article explains the new features in Python 2.1. While there aren't as many changes in 2.1 as there were in Python 2.0, there are still some pleasant surprises in store. 2.1 is the first release to be steered through the use of Python Enhancement Proposals, or PEPs, so most of @@ -34,6 +27,12 @@ provides an overview of the new features for Python programmers. Refer to the Python 2.1 documentation, or to the specific PEP, for more details about any new feature that particularly interests you. +One recent goal of the Python development team has been to accelerate +the pace of new releases, with a new release coming every 6 to 9 +months. 2.1 is the first release to come out at this faster pace, with +the first alpha appearing in January, 3 months after the final version +of 2.0 was released. + The final release of Python 2.1 was made on April 17, 2001. %====================================================================== diff --git a/Doc/whatsnew/whatsnew24.tex b/Doc/whatsnew/whatsnew24.tex index e9ff4d9..51baece 100644 --- a/Doc/whatsnew/whatsnew24.tex +++ b/Doc/whatsnew/whatsnew24.tex @@ -803,8 +803,8 @@ from SimpleXMLRPCServer import (SimpleXMLRPCServer, The PEP also proposes that all \keyword{import} statements be absolute imports, with a leading \samp{.} character to indicate a relative -import. This part of the PEP is not yet implemented, and will have to -wait for Python 2.5 or some other future version. +import. This part of the PEP was not implemented for Python 2.4, +but was completed for Python 2.5. \begin{seealso} \seepep{328}{Imports: Multi-Line and Absolute/Relative} @@ -1336,7 +1336,7 @@ be used to implement other rotating handlers. \item The \module{marshal} module now shares interned strings on unpacking a data structure. This may shrink the size of certain pickle strings, but the primary effect is to make \file{.pyc} files significantly smaller. -(Contributed by Martin von Loewis.) +(Contributed by Martin von~L\"owis.) \item The \module{nntplib} module's \class{NNTP} class gained \method{description()} and \method{descriptions()} methods to retrieve @@ -1688,7 +1688,7 @@ Some of the changes to Python's build process and to the C API are: \begin{itemize} \item The Windows port now builds under MSVC++ 7.1 as well as version 6. - (Contributed by Martin von Loewis.) + (Contributed by Martin von~L\"owis.) \end{itemize} diff --git a/Doc/whatsnew/whatsnew25.tex b/Doc/whatsnew/whatsnew25.tex index 5743285..db6c25a 100644 --- a/Doc/whatsnew/whatsnew25.tex +++ b/Doc/whatsnew/whatsnew25.tex @@ -2,9 +2,14 @@ \usepackage{distutils} % $Id$ +% The easy_install stuff +% Describe the pkgutil module +% Stateful codec changes +% Fix XXX comments +% Count up the patches and bugs \title{What's New in Python 2.5} -\release{0.0} +\release{0.1} \author{A.M. Kuchling} \authoraddress{\email{amk@amk.ca}} @@ -14,7 +19,10 @@ This article explains the new features in Python 2.5. No release date for Python 2.5 has been set; it will probably be released in the -autumn of 2006. +autumn of 2006. \pep{356} describes the planned release schedule. + +Comments, suggestions, and error reports are welcome; please e-mail them +to the author or open a bug in the Python bug tracker. % XXX Compare with previous release in 2 - 3 sentences here. @@ -27,6 +35,32 @@ rationale, refer to the PEP for a particular new feature. %====================================================================== +\section{PEP 243: Uploading Modules to PyPI} + +PEP 243 describes an HTTP-based protocol for submitting software +packages to a central archive. The Python package index at +\url{http://cheeseshop.python.org} now supports package uploads, and +the new \command{upload} Distutils command will upload a package to the +repository. + +Before a package can be uploaded, you must be able to build a +distribution using the \command{sdist} Distutils command. Once that +works, you can run \code{python setup.py upload} to add your package +to the PyPI archive. Optionally you can GPG-sign the package by +supplying the \longprogramopt{sign} and +\longprogramopt{identity} options. + +\begin{seealso} + +\seepep{243}{Module Repository Upload Mechanism}{PEP written by +Sean Reifschneider; implemented by Martin von~L\"owis +and Richard Jones. Note that the PEP doesn't exactly +describe what's implemented in PyPI.} + +\end{seealso} + + +%====================================================================== \section{PEP 308: Conditional Expressions} For a long time, people have been requesting a way to write @@ -43,10 +77,10 @@ else: \end{verbatim} There have been endless tedious discussions of syntax on both -python-dev and comp.lang.python, and even a vote that found the -majority of voters wanted some way to write conditional expressions, -but there was no syntax that was clearly preferred by a majority. -Candidates include C's \code{cond ? true_v : false_v}, +python-dev and comp.lang.python. A vote was even held that found the +majority of voters wanted conditional expressions in some form, +but there was no syntax that was preferred by a clear majority. +Candidates included C's \code{cond ? true_v : false_v}, \code{if cond then true_v else false_v}, and 16 other variations. GvR eventually chose a surprising syntax: @@ -55,11 +89,12 @@ GvR eventually chose a surprising syntax: x = true_value if condition else false_value \end{verbatim} -Evaluation is still lazy as in existing Boolean expression, so the -evaluation jumps around a bit. The \var{condition} expression is -evaluated first, and the \var{true_value} expression is evaluated only -if the condition was true. Similarly, the \var{false_value} -expression is only evaluated when the condition is false. +Evaluation is still lazy as in existing Boolean expressions, so the +order of evaluation jumps around a bit. The \var{condition} +expression in the middle is evaluated first, and the \var{true_value} +expression is evaluated only if the condition was true. Similarly, +the \var{false_value} expression is only evaluated when the condition +is false. This syntax may seem strange and backwards; why does the condition go in the \emph{middle} of the expression, and not in the front as in C's @@ -110,7 +145,7 @@ around your conditional expressions, you won't run into this case. \begin{seealso} \seepep{308}{Conditional Expressions}{PEP written by -Guido van Rossum and Raymond D. Hettinger; implemented by Thomas +Guido van~Rossum and Raymond D. Hettinger; implemented by Thomas Wouters.} \end{seealso} @@ -192,7 +227,16 @@ set to a URL for the package's source code. This means it's now possible to look up an entry in the package index, determine the dependencies for a package, and download the required packages. -% XXX put example here +\begin{verbatim} +VERSION = '1.0' +setup(name='PyPackage', + version=VERSION, + requires=['numarray', 'zlib (>=1.1.4)'], + obsoletes=['OldPackage'] + download_url=('http://www.example.com/pypackage/dist/pkg-%s.tar.gz' + % VERSION), + ) +\end{verbatim} \begin{seealso} @@ -206,31 +250,178 @@ implemented by Richard Jones and Fred Drake.} %====================================================================== \section{PEP 328: Absolute and Relative Imports} -% XXX write this +The simpler part of PEP 328 was implemented in Python 2.4: parentheses +could now be used to enclose the names imported from a module using +the \code{from ... import ...} statement, making it easier to import +many different names. + +The more complicated part has been implemented in Python 2.5: +importing a module can be specified to use absolute or +package-relative imports. The plan is to move toward making absolute +imports the default in future versions of Python. + +Let's say you have a package directory like this: +\begin{verbatim} +pkg/ +pkg/__init__.py +pkg/main.py +pkg/string.py +\end{verbatim} + +This defines a package named \module{pkg} containing the +\module{pkg.main} and \module{pkg.string} submodules. + +Consider the code in the \file{main.py} module. What happens if it +executes the statement \code{import string}? In Python 2.4 and +earlier, it will first look in the package's directory to perform a +relative import, finds \file{pkg/string.py}, imports the contents of +that file as the \module{pkg.string} module, and that module is bound +to the name \samp{string} in the \module{pkg.main} module's namespace. + +That's fine if \module{pkg.string} was what you wanted. But what if +you wanted Python's standard \module{string} module? There's no clean +way to ignore \module{pkg.string} and look for the standard module; +generally you had to look at the contents of \code{sys.modules}, which +is slightly unclean. +Holger Krekel's \module{py.std} package provides a tidier way to perform +imports from the standard library, \code{import py ; py.std.string.join()}, +but that package isn't available on all Python installations. + +Reading code which relies on relative imports is also less clear, +because a reader may be confused about which module, \module{string} +or \module{pkg.string}, is intended to be used. Python users soon +learned not to duplicate the names of standard library modules in the +names of their packages' submodules, but you can't protect against +having your submodule's name being used for a new module added in a +future version of Python. + +In Python 2.5, you can switch \keyword{import}'s behaviour to +absolute imports using a \code{from __future__ import absolute_import} +directive. This absolute-import behaviour will become the default in +a future version (probably Python 2.7). Once absolute imports +are the default, \code{import string} will +always find the standard library's version. +It's suggested that users should begin using absolute imports as much +as possible, so it's preferable to begin writing \code{from pkg import +string} in your code. + +Relative imports are still possible by adding a leading period +to the module name when using the \code{from ... import} form: + +\begin{verbatim} +# Import names from pkg.string +from .string import name1, name2 +# Import pkg.string +from . import string +\end{verbatim} + +This imports the \module{string} module relative to the current +package, so in \module{pkg.main} this will import \var{name1} and +\var{name2} from \module{pkg.string}. Additional leading periods +perform the relative import starting from the parent of the current +package. For example, code in the \module{A.B.C} module can do: + +\begin{verbatim} +from . import D # Imports A.B.D +from .. import E # Imports A.E +from ..F import G # Imports A.F.G +\end{verbatim} + +Leading periods cannot be used with the \code{import \var{modname}} +form of the import statement, only the \code{from ... import} form. + +\begin{seealso} + +\seepep{328}{Imports: Multi-Line and Absolute/Relative} +{PEP written by Aahz; implemented by Thomas Wouters.} + +\seeurl{http://codespeak.net/py/current/doc/index.html} +{The py library by Holger Krekel, which contains the \module{py.std} package.} + +\end{seealso} %====================================================================== \section{PEP 338: Executing Modules as Scripts} -% XXX write this +The \programopt{-m} switch added in Python 2.4 to execute a module as +a script gained a few more abilities. Instead of being implemented in +C code inside the Python interpreter, the switch now uses an +implementation in a new module, \module{runpy}. + +The \module{runpy} module implements a more sophisticated import +mechanism so that it's now possible to run modules in a package such +as \module{pychecker.checker}. The module also supports alternative +import mechanisms such as the \module{zipimport} module. This means +you can add a .zip archive's path to \code{sys.path} and then use the +\programopt{-m} switch to execute code from the archive. + + +\begin{seealso} + +\seepep{338}{Executing modules as scripts}{PEP written and +implemented by Nick Coghlan.} + +\end{seealso} %====================================================================== \section{PEP 341: Unified try/except/finally} -% XXX write this +Until Python 2.5, the \keyword{try} statement came in two +flavours. You could use a \keyword{finally} block to ensure that code +is always executed, or one or more \keyword{except} blocks to catch +specific exceptions. You couldn't combine both \keyword{except} blocks and a +\keyword{finally} block, because generating the right bytecode for the +combined version was complicated and it wasn't clear what the +semantics of the combined should be. + +GvR spent some time working with Java, which does support the +equivalent of combining \keyword{except} blocks and a +\keyword{finally} block, and this clarified what the statement should +mean. In Python 2.5, you can now write: + +\begin{verbatim} +try: + block-1 ... +except Exception1: + handler-1 ... +except Exception2: + handler-2 ... +else: + else-block +finally: + final-block +\end{verbatim} + +The code in \var{block-1} is executed. If the code raises an +exception, the handlers are tried in order: \var{handler-1}, +\var{handler-2}, ... If no exception is raised, the \var{else-block} +is executed. No matter what happened previously, the +\var{final-block} is executed once the code block is complete and any +raised exceptions handled. Even if there's an error in an exception +handler or the \var{else-block} and a new exception is raised, the +\var{final-block} is still executed. + +\begin{seealso} + +\seepep{341}{Unifying try-except and try-finally}{PEP written by Georg Brandl; +implementation by Thomas Lee.} + +\end{seealso} %====================================================================== -\section{PEP 342: New Generator Features} +\section{PEP 342: New Generator Features\label{section-generators}} +Python 2.5 adds a simple way to pass values \emph{into} a generator. As introduced in Python 2.3, generators only produce output; once a generator's code is invoked to create an iterator, there's no way to pass any new information into the function when its execution is -resumed. Hackish solutions to this include making the generator's -code look at a global variable and then changing the global variable's +resumed. Sometimes the ability to pass in some information would be +useful. Hackish solutions to this include making the generator's code +look at a global variable and then changing the global variable's value, or passing in some mutable object that callers then modify. -Python 2.5 adds the ability to pass values \emph{into} a generator. To refresh your memory of basic generators, here's a simple example: @@ -239,7 +430,7 @@ def counter (maximum): i = 0 while i < maximum: yield i - i += 1 + i += 1 \end{verbatim} When you call \code{counter(10)}, the result is an iterator that @@ -261,11 +452,14 @@ I recommend that you always put parentheses around a \keyword{yield} expression when you're doing something with the returned value, as in the above example. The parentheses aren't always necessary, but it's easier to always add them instead of having to remember when they're -needed.\footnote{The exact rules are that a \keyword{yield}-expression must -always be parenthesized except when it occurs at the top-level -expression on the right-hand side of an assignment, meaning you can -write \code{val = yield i} but have to use parentheses when there's an -operation, as in \code{val = (yield i) + 12}.} +needed. + +(\pep{342} explains the exact rules, which are that a +\keyword{yield}-expression must always be parenthesized except when it +occurs at the top-level expression on the right-hand side of an +assignment. This means you can write \code{val = yield i} but have to +use parentheses when there's an operation, as in \code{val = (yield i) ++ 12}.) Values are sent into a generator by calling its \method{send(\var{value})} method. The generator's code is then @@ -281,11 +475,11 @@ def counter (maximum): i = 0 while i < maximum: val = (yield i) - # If value provided, change counter + # If value provided, change counter if val is not None: i = val - else: - i += 1 + else: + i += 1 \end{verbatim} And here's an example of changing the counter: @@ -344,13 +538,32 @@ Generators also become \emph{coroutines}, a more generalized form of subroutines. Subroutines are entered at one point and exited at another point (the top of the function, and a \keyword{return statement}), but coroutines can be entered, exited, and resumed at -many different points (the \keyword{yield} statements). - +many different points (the \keyword{yield} statements). We'll have to +figure out patterns for using coroutines effectively in Python. + +The addition of the \method{close()} method has one side effect that +isn't obvious. \method{close()} is called when a generator is +garbage-collected, so this means the generator's code gets one last +chance to run before the generator is destroyed. This last chance +means that \code{try...finally} statements in generators can now be +guaranteed to work; the \keyword{finally} clause will now always get a +chance to run. The syntactic restriction that you couldn't mix +\keyword{yield} statements with a \code{try...finally} suite has +therefore been removed. This seems like a minor bit of language +trivia, but using generators and \code{try...finally} is actually +necessary in order to implement the \keyword{with} statement +described by PEP 343. I'll look at this new statement in the following +section. + +Another even more esoteric effect of this change: previously, the +\member{gi_frame} attribute of a generator was always a frame object. +It's now possible for \member{gi_frame} to be \code{None} +once the generator has been exhausted. \begin{seealso} \seepep{342}{Coroutines via Enhanced Generators}{PEP written by -Guido van Rossum and Phillip J. Eby; +Guido van~Rossum and Phillip J. Eby; implemented by Phillip J. Eby. Includes examples of some fancier uses of generators as coroutines.} @@ -367,14 +580,323 @@ Sugalski.} %====================================================================== \section{PEP 343: The 'with' statement} -% XXX write this +The \keyword{with} statement allows a clearer version of code that +uses \code{try...finally} blocks to ensure that clean-up code is +executed. + +In this section, I'll discuss the statement as it will commonly be +used. In the next section, I'll examine the implementation details +and show how to write objects called ``context managers'' and +``contexts'' for use with this statement. + +The \keyword{with} statement is a new control-flow structure whose +basic structure is: + +\begin{verbatim} +with expression [as variable]: + with-block +\end{verbatim} + +The expression is evaluated, and it should result in a type of object +that's called a context manager. The context manager can return a +value that can optionally be bound to the name \var{variable}. (Note +carefully: \var{variable} is \emph{not} assigned the result of +\var{expression}.) One method of the context manager is run before +\var{with-block} is executed, and another method is run after the +block is done, even if the block raised an exception. + +To enable the statement in Python 2.5, you need +to add the following directive to your module: + +\begin{verbatim} +from __future__ import with_statement +\end{verbatim} + +The statement will always be enabled in Python 2.6. + +Some standard Python objects can now behave as context managers. File +objects are one example: + +\begin{verbatim} +with open('/etc/passwd', 'r') as f: + for line in f: + print line + ... more processing code ... +\end{verbatim} + +After this statement has executed, the file object in \var{f} will +have been automatically closed at this point, even if the 'for' loop +raised an exception part-way through the block. + +The \module{threading} module's locks and condition variables +also support the \keyword{with} statement: + +\begin{verbatim} +lock = threading.Lock() +with lock: + # Critical section of code + ... +\end{verbatim} + +The lock is acquired before the block is executed, and always released once +the block is complete. + +The \module{decimal} module's contexts, which encapsulate the desired +precision and rounding characteristics for computations, can also be +used as context managers. + +\begin{verbatim} +import decimal + +# Displays with default precision of 28 digits +v1 = decimal.Decimal('578') +print v1.sqrt() + +with decimal.Context(prec=16): + # All code in this block uses a precision of 16 digits. + # The original context is restored on exiting the block. + print v1.sqrt() +\end{verbatim} + +\subsection{Writing Context Managers} + +Under the hood, the \keyword{with} statement is fairly complicated. +Most people will only use \keyword{with} in company with +existing objects that are documented to work as context managers, and +don't need to know these details, so you can skip the following section if +you like. Authors of new context managers will need to understand the +details of the underlying implementation. + +A high-level explanation of the context management protocol is: + +\begin{itemize} +\item The expression is evaluated and should result in an object +that's a context manager, meaning that it has a +\method{__context__()} method. + +\item This object's \method{__context__()} method is called, and must +return a context object. + +\item The context's \method{__enter__()} method is called. +The value returned is assigned to \var{VAR}. If no \code{as \var{VAR}} +clause is present, the value is simply discarded. + +\item The code in \var{BLOCK} is executed. + +\item If \var{BLOCK} raises an exception, the context object's +\method{__exit__(\var{type}, \var{value}, \var{traceback})} is called +with the exception's information, the same values returned by +\function{sys.exc_info()}. The method's return value +controls whether the exception is re-raised: any false value +re-raises the exception, and \code{True} will result in suppressing it. +You'll only rarely want to suppress the exception; the +author of the code containing the \keyword{with} statement will +never realize anything went wrong. + +\item If \var{BLOCK} didn't raise an exception, +the context object's \method{__exit__()} is still called, +but \var{type}, \var{value}, and \var{traceback} are all \code{None}. + +\end{itemize} + +Let's think through an example. I won't present detailed code but +will only sketch the necessary code. The example will be writing a +context manager for a database that supports transactions. + +(For people unfamiliar with database terminology: a set of changes to +the database are grouped into a transaction. Transactions can be +either committed, meaning that all the changes are written into the +database, or rolled back, meaning that the changes are all discarded +and the database is unchanged. See any database textbook for more +information.) +% XXX find a shorter reference? + +Let's assume there's an object representing a database connection. +Our goal will be to let the user write code like this: + +\begin{verbatim} +db_connection = DatabaseConnection() +with db_connection as cursor: + cursor.execute('insert into ...') + cursor.execute('delete from ...') + # ... more operations ... +\end{verbatim} + +The transaction should either be committed if the code in the block +runs flawlessly, or rolled back if there's an exception. + +First, the \class{DatabaseConnection} needs a \method{__context__()} +method. Sometimes an object can be its own context manager and can +simply return \code{self}; the \module{threading} module's lock objects +can do this. For our database example, though, we need to +create a new object; I'll call this class \class{DatabaseContext}. +Our \method{__context__()} must therefore look like this: + +\begin{verbatim} +class DatabaseConnection: + ... + def __context__ (self): + return DatabaseContext(self) + + # Database interface + def cursor (self): + "Returns a cursor object and starts a new transaction" + def commit (self): + "Commits current transaction" + def rollback (self): + "Rolls back current transaction" +\end{verbatim} + +The context needs the connection object so that the connection +object's \method{commit()} or \method{rollback()} methods can be +called: + +\begin{verbatim} +class DatabaseContext: + def __init__ (self, connection): + self.connection = connection +\end{verbatim} + +The \method {__enter__()} method is pretty easy, having only +to start a new transaction. In this example, +the resulting cursor object would be a useful result, +so the method will return it. The user can +then add \code{as cursor} to their \keyword{with} statement +to bind the cursor to a variable name. + +\begin{verbatim} +class DatabaseContext: + ... + def __enter__ (self): + # Code to start a new transaction + cursor = self.connection.cursor() + return cursor +\end{verbatim} + +The \method{__exit__()} method is the most complicated because it's +where most of the work has to be done. The method has to check if an +exception occurred. If there was no exception, the transaction is +committed. The transaction is rolled back if there was an exception. +Here the code will just fall off the end of the function, returning +the default value of \code{None}. \code{None} is false, so the exception +will be re-raised automatically. If you wished, you could be more explicit +and add a \keyword{return} at the marked location. + +\begin{verbatim} +class DatabaseContext: + ... + def __exit__ (self, type, value, tb): + if tb is None: + # No exception, so commit + self.connection.commit() + else: + # Exception occurred, so rollback. + self.connection.rollback() + # return False +\end{verbatim} + + +\subsection{The contextlib module\label{module-contextlib}} + +The new \module{contextlib} module provides some functions and a +decorator that are useful for writing context managers. + +The decorator is called \function{contextmanager}, and lets you write +a simple context manager as a generator. The generator should yield +exactly one value. The code up to the \keyword{yield} will be +executed as the \method{__enter__()} method, and the value yielded +will be the method's return value that will get bound to the variable +in the \keyword{with} statement's \keyword{as} clause, if any. The +code after the \keyword{yield} will be executed in the +\method{__exit__()} method. Any exception raised in the block +will be raised by the \keyword{yield} statement. + +Our database example from the previous section could be written +using this decorator as: + +\begin{verbatim} +from contextlib import contextmanager + +@contextmanager +def db_transaction (connection): + cursor = connection.cursor() + try: + yield cursor + except: + connection.rollback() + raise + else: + connection.commit() + +db = DatabaseConnection() +with db_transaction(db) as cursor: + ... +\end{verbatim} + +You can also use this decorator to write the \method{__context__()} method +for a class without creating a new class for the context: + +\begin{verbatim} +class DatabaseConnection: + + @contextmanager + def __context__ (self): + cursor = self.cursor() + try: + yield cursor + except: + self.rollback() + raise + else: + self.commit() +\end{verbatim} + + +There's a \function{nested(\var{mgr1}, \var{mgr2}, ...)} manager that +combines a number of context managers so you don't need to write +nested \keyword{with} statements. This example statement does two +things, starting a database transaction and acquiring a thread lock: + +\begin{verbatim} +lock = threading.Lock() +with nested (db_transaction(db), lock) as (cursor, locked): + ... +\end{verbatim} + +Finally, the \function{closing(\var{object})} context manager +returns \var{object} so that it can be bound to a variable, +and calls \code{\var{object}.close()} at the end of the block. + +\begin{verbatim} +import urllib, sys +from contextlib import closing + +with closing(urllib.urlopen('http://www.yahoo.com')) as f: + for line in f: + sys.stdout.write(line) +\end{verbatim} + +\begin{seealso} + +\seepep{343}{The ``with'' statement}{PEP written by Guido van~Rossum +and Nick Coghlan; implemented by Mike Bland, Guido van~Rossum, and +Neal Norwitz. The PEP shows the code generated for a \keyword{with} +statement, which can be helpful in learning how context managers +work.} + +\seeurl{../lib/module-contextlib.html}{The documentation +for the \module{contextlib} module.} + +\end{seealso} %====================================================================== \section{PEP 352: Exceptions as New-Style Classes} -Exception classes can now be new-style classes, not just classic classes, -and the built-in \exception{Exception} class and all +Exception classes can now be new-style classes, not just classic +classes, and the built-in \exception{Exception} class and all the +standard built-in exceptions (\exception{NameError}, +\exception{ValueError}, etc.) are now new-style classes. The inheritance hierarchy for exceptions has been rearranged a bit. In 2.5, the inheritance relationships are: @@ -428,8 +950,70 @@ in a few releases. \begin{seealso} -\seepep{352}{}{PEP written by -Brett Cannon and Guido van Rossum; implemented by Brett Cannon.} +\seepep{352}{Required Superclass for Exceptions}{PEP written by +Brett Cannon and Guido van~Rossum; implemented by Brett Cannon.} + +\end{seealso} + + +%====================================================================== +\section{PEP 353: Using ssize_t as the index type\label{section-353}} + +A wide-ranging change to Python's C API, using a new +\ctype{Py_ssize_t} type definition instead of \ctype{int}, +will permit the interpreter to handle more data on 64-bit platforms. +This change doesn't affect Python's capacity on 32-bit platforms. + +Various pieces of the Python interpreter used C's \ctype{int} type to +store sizes or counts; for example, the number of items in a list or +tuple were stored in an \ctype{int}. The C compilers for most 64-bit +platforms still define \ctype{int} as a 32-bit type, so that meant +that lists could only hold up to \code{2**31 - 1} = 2147483647 items. +(There are actually a few different programming models that 64-bit C +compilers can use -- see +\url{http://www.unix.org/version2/whatsnew/lp64_wp.html} for a +discussion -- but the most commonly available model leaves \ctype{int} +as 32 bits.) + +A limit of 2147483647 items doesn't really matter on a 32-bit platform +because you'll run out of memory before hitting the length limit. +Each list item requires space for a pointer, which is 4 bytes, plus +space for a \ctype{PyObject} representing the item. 2147483647*4 is +already more bytes than a 32-bit address space can contain. + +It's possible to address that much memory on a 64-bit platform, +however. The pointers for a list that size would only require 16GiB +of space, so it's not unreasonable that Python programmers might +construct lists that large. Therefore, the Python interpreter had to +be changed to use some type other than \ctype{int}, and this will be a +64-bit type on 64-bit platforms. The change will cause +incompatibilities on 64-bit machines, so it was deemed worth making +the transition now, while the number of 64-bit users is still +relatively small. (In 5 or 10 years, we may \emph{all} be on 64-bit +machines, and the transition would be more painful then.) + +This change most strongly affects authors of C extension modules. +Python strings and container types such as lists and tuples +now use \ctype{Py_ssize_t} to store their size. +Functions such as \cfunction{PyList_Size()} +now return \ctype{Py_ssize_t}. Code in extension modules +may therefore need to have some variables changed to +\ctype{Py_ssize_t}. + +The \cfunction{PyArg_ParseTuple()} and \cfunction{Py_BuildValue()} functions +have a new conversion code, \samp{n}, for \ctype{Py_ssize_t}. +\cfunction{PyArg_ParseTuple()}'s \samp{s\#} and \samp{t\#} still output +\ctype{int} by default, but you can define the macro +\csimplemacro{PY_SSIZE_T_CLEAN} before including \file{Python.h} +to make them return \ctype{Py_ssize_t}. + +\pep{353} has a section on conversion guidelines that +extension authors should read to learn about supporting 64-bit +platforms. + +\begin{seealso} + +\seepep{353}{Using ssize_t as the index type}{PEP written and implemented by Martin von~L\"owis.} \end{seealso} @@ -437,7 +1021,47 @@ Brett Cannon and Guido van Rossum; implemented by Brett Cannon.} %====================================================================== \section{PEP 357: The '__index__' method} -% XXX write this +The NumPy developers had a problem that could only be solved by adding +a new special method, \method{__index__}. When using slice notation, +as in \code{[\var{start}:\var{stop}:\var{step}]}, the values of the +\var{start}, \var{stop}, and \var{step} indexes must all be either +integers or long integers. NumPy defines a variety of specialized +integer types corresponding to unsigned and signed integers of 8, 16, +32, and 64 bits, but there was no way to signal that these types could +be used as slice indexes. + +Slicing can't just use the existing \method{__int__} method because +that method is also used to implement coercion to integers. If +slicing used \method{__int__}, floating-point numbers would also +become legal slice indexes and that's clearly an undesirable +behaviour. + +Instead, a new special method called \method{__index__} was added. It +takes no arguments and returns an integer giving the slice index to +use. For example: + +\begin{verbatim} +class C: + def __index__ (self): + return self.value +\end{verbatim} + +The return value must be either a Python integer or long integer. +The interpreter will check that the type returned is correct, and +raises a \exception{TypeError} if this requirement isn't met. + +A corresponding \member{nb_index} slot was added to the C-level +\ctype{PyNumberMethods} structure to let C extensions implement this +protocol. \cfunction{PyNumber_Index(\var{obj})} can be used in +extension code to call the \method{__index__} function and retrieve +its result. + +\begin{seealso} + +\seepep{357}{Allowing Any Object to be Used for Slicing}{PEP written +and implemented by Travis Oliphant.} + +\end{seealso} %====================================================================== @@ -448,10 +1072,29 @@ language. \begin{itemize} +\item The \class{dict} type has a new hook for letting subclasses +provide a default value when a key isn't contained in the dictionary. +When a key isn't found, the dictionary's +\method{__missing__(\var{key})} +method will be called. This hook is used to implement +the new \class{defaultdict} class in the \module{collections} +module. The following example defines a dictionary +that returns zero for any missing key: + +\begin{verbatim} +class zerodict (dict): + def __missing__ (self, key): + return 0 + +d = zerodict({1:1, 2:2}) +print d[1], d[2] # Prints 1, 2 +print d[3], d[4] # Prints 0, 0 +\end{verbatim} + \item The \function{min()} and \function{max()} built-in functions gained a \code{key} keyword argument analogous to the \code{key} -argument for \method{sort()}. This argument supplies a function -that takes a single argument and is called for every value in the list; +argument for \method{sort()}. This argument supplies a function that +takes a single argument and is called for every value in the list; \function{min()}/\function{max()} will return the element with the smallest/largest return value from this function. For example, to find the longest string in a list, you can do: @@ -459,7 +1102,7 @@ For example, to find the longest string in a list, you can do: \begin{verbatim} L = ['medium', 'longest', 'short'] # Prints 'longest' -print max(L, key=len) +print max(L, key=len) # Prints 'short', because lexicographically 'short' has the largest value print max(L) \end{verbatim} @@ -472,9 +1115,18 @@ false values. \function{any()} returns \constant{True} if any value returned by the iterator is true; otherwise it will return \constant{False}. \function{all()} returns \constant{True} only if all of the values returned by the iterator evaluate as being true. +(Suggested by GvR, and implemented by Raymond Hettinger.) -% XXX who added? +\item ASCII is now the default encoding for modules. It's now +a syntax error if a module contains string literals with 8-bit +characters but doesn't have an encoding declaration. In Python 2.4 +this triggered a warning, not a syntax error. See \pep{263} +for how to declare a module's encoding; for example, you might add +a line like this near the top of the source file: +\begin{verbatim} +# -*- coding: latin1 -*- +\end{verbatim} \item The list of base classes in a class definition can now be empty. As an example, this is now legal: @@ -489,6 +1141,24 @@ class C(): %====================================================================== +\subsection{Interactive Interpreter Changes} + +In the interactive interpreter, \code{quit} and \code{exit} +have long been strings so that new users get a somewhat helpful message +when they try to quit: + +\begin{verbatim} +>>> quit +'Use Ctrl-D (i.e. EOF) to exit.' +\end{verbatim} + +In Python 2.5, \code{quit} and \code{exit} are now objects that still +produce string representations of themselves, but are also callable. +Newbies who try \code{quit()} or \code{exit()} will now exit the +interpreter as they expect. (Implemented by Georg Brandl.) + + +%====================================================================== \subsection{Optimizations} \begin{itemize} @@ -500,43 +1170,135 @@ In 2.5 the internal data structure has been customized for implementing sets, and as a result sets will use a third less memory and are somewhat faster. (Implemented by Raymond Hettinger.) +\item The performance of some Unicode operations, such as +character map decoding, has been improved. +% Patch 1313939 + +\item The code generator's peephole optimizer now performs +simple constant folding in expressions. If you write something like +\code{a = 2+3}, the code generator will do the arithmetic and produce +code corresponding to \code{a = 5}. + \end{itemize} The net result of the 2.5 optimizations is that Python 2.5 runs the -pystone benchmark around XX\% faster than Python 2.4. +pystone benchmark around XXX\% faster than Python 2.4. %====================================================================== \section{New, Improved, and Deprecated Modules} -As usual, Python's standard library received a number of enhancements and -bug fixes. Here's a partial list of the most notable changes, sorted -alphabetically by module name. Consult the -\file{Misc/NEWS} file in the source tree for a more -complete list of changes, or look through the SVN logs for all the -details. +The standard library received many enhancements and bug fixes in +Python 2.5. Here's a partial list of the most notable changes, sorted +alphabetically by module name. Consult the \file{Misc/NEWS} file in +the source tree for a more complete list of changes, or look through +the SVN logs for all the details. \begin{itemize} -% ctypes added - -% collections.deque now has .remove() - % the cPickle module no longer accepts the deprecated None option in the % args tuple returned by __reduce__(). -% csv module improvements +\item The \module{audioop} module now supports the a-LAW encoding, +and the code for u-LAW encoding has been improved. (Contributed by +Lars Immisch.) -% datetime.datetime() now has a strptime class method which can be used to -% create datetime object using a string and format. +\item The \module{collections} module gained a new type, +\class{defaultdict}, that subclasses the standard \class{dict} +type. The new type mostly behaves like a dictionary but constructs a +default value when a key isn't present, automatically adding it to the +dictionary for the requested key value. -\item A new \module{hashlib} module has been added to replace the -\module{md5} and \module{sha} modules. \module{hashlib} adds support -for additional secure hashes (SHA-224, SHA-256, SHA-384, and SHA-512). -When available, the module uses OpenSSL for fast platform optimized -implementations of algorithms. The old \module{md5} and \module{sha} -modules still exist as wrappers around hashlib to preserve backwards -compatibility. (Contributed by Gregory P. Smith.) +The first argument to \class{defaultdict}'s constructor is a factory +function that gets called whenever a key is requested but not found. +This factory function receives no arguments, so you can use built-in +type constructors such as \function{list()} or \function{int()}. For +example, +you can make an index of words based on their initial letter like this: + +\begin{verbatim} +words = """Nel mezzo del cammin di nostra vita +mi ritrovai per una selva oscura +che la diritta via era smarrita""".lower().split() + +index = defaultdict(list) + +for w in words: + init_letter = w[0] + index[init_letter].append(w) +\end{verbatim} + +Printing \code{index} results in the following output: + +\begin{verbatim} +defaultdict(, {'c': ['cammin', 'che'], 'e': ['era'], + 'd': ['del', 'di', 'diritta'], 'm': ['mezzo', 'mi'], + 'l': ['la'], 'o': ['oscura'], 'n': ['nel', 'nostra'], + 'p': ['per'], 's': ['selva', 'smarrita'], + 'r': ['ritrovai'], 'u': ['una'], 'v': ['vita', 'via']} +\end{verbatim} + +The \class{deque} double-ended queue type supplied by the +\module{collections} module now has a \method{remove(\var{value})} +method that removes the first occurrence of \var{value} in the queue, +raising \exception{ValueError} if the value isn't found. + +\item New module: The \module{contextlib} module contains helper functions for use +with the new \keyword{with} statement. See +section~\ref{module-contextlib} for more about this module. +(Contributed by Phillip J. Eby.) + +\item New module: The \module{cProfile} module is a C implementation of +the existing \module{profile} module that has much lower overhead. +The module's interface is the same as \module{profile}: you run +\code{cProfile.run('main()')} to profile a function, can save profile +data to a file, etc. It's not yet known if the Hotshot profiler, +which is also written in C but doesn't match the \module{profile} +module's interface, will continue to be maintained in future versions +of Python. (Contributed by Armin Rigo.) + +\item The \module{csv} module, which parses files in +comma-separated value format, received several enhancements and a +number of bugfixes. You can now set the maximum size in bytes of a +field by calling the \method{csv.field_size_limit(\var{new_limit})} +function; omitting the \var{new_limit} argument will return the +currently-set limit. The \class{reader} class now has a +\member{line_num} attribute that counts the number of physical lines +read from the source; records can span multiple physical lines, so +\member{line_num} is not the same as the number of records read. +(Contributed by Skip Montanaro and Andrew McNamara.) + +\item The \class{datetime} class in the \module{datetime} +module now has a \method{strptime(\var{string}, \var{format})} +method for parsing date strings, contributed by Josh Spoerri. +It uses the same format characters as \function{time.strptime()} and +\function{time.strftime()}: + +\begin{verbatim} +from datetime import datetime + +ts = datetime.strptime('10:13:15 2006-03-07', + '%H:%M:%S %Y-%m-%d') +\end{verbatim} + +\item The \module{fileinput} module was made more flexible. +Unicode filenames are now supported, and a \var{mode} parameter that +defaults to \code{"r"} was added to the +\function{input()} function to allow opening files in binary or +universal-newline mode. Another new parameter, \var{openhook}, +lets you use a function other than \function{open()} +to open the input files. Once you're iterating over +the set of files, the \class{FileInput} object's new +\method{fileno()} returns the file descriptor for the currently opened file. +(Contributed by Georg Brandl.) + +\item In the \module{gc} module, the new \function{get_count()} function +returns a 3-tuple containing the current collection counts for the +three GC generations. This is accounting information for the garbage +collector; when these counts reach a specified threshold, a garbage +collection sweep will be made. The existing \function{gc.collect()} +function now takes an optional \var{generation} argument of 0, 1, or 2 +to specify which generation to collect. \item The \function{nsmallest()} and \function{nlargest()} functions in the \module{heapq} module @@ -568,6 +1330,11 @@ itertools.islice(iterable, s.start, s.stop, s.step) (Contributed by Raymond Hettinger.) +\item The \module{nis} module now supports accessing domains other +than the system default domain by supplying a \var{domain} argument to +the \function{nis.match()} and \function{nis.maps()} functions. +(Contributed by Ben Bell.) + \item The \module{operator} module's \function{itemgetter()} and \function{attrgetter()} functions now support multiple fields. A call such as \code{operator.attrgetter('a', 'b')} @@ -575,11 +1342,10 @@ will return a function that retrieves the \member{a} and \member{b} attributes. Combining this new feature with the \method{sort()} method's \code{key} parameter lets you easily sort lists using multiple fields. - -% XXX who added? +(Contributed by Raymond Hettinger.) -\item The \module{os} module underwent a number of changes. The +\item The \module{os} module underwent several changes. The \member{stat_float_times} variable now defaults to true, meaning that \function{os.stat()} will now return time values as floats. (This doesn't necessarily mean that \function{os.stat()} will return times @@ -591,11 +1357,34 @@ Constants named \member{os.SEEK_SET}, \member{os.SEEK_CUR}, and \function{os.lseek()} function. Two new constants for locking are \member{os.O_SHLOCK} and \member{os.O_EXLOCK}. +Two new functions, \function{wait3()} and \function{wait4()}, were +added. They're similar the \function{waitpid()} function which waits +for a child process to exit and returns a tuple of the process ID and +its exit status, but \function{wait3()} and \function{wait4()} return +additional information. \function{wait3()} doesn't take a process ID +as input, so it waits for any child process to exit and returns a +3-tuple of \var{process-id}, \var{exit-status}, \var{resource-usage} +as returned from the \function{resource.getrusage()} function. +\function{wait4(\var{pid})} does take a process ID. +(Contributed by Chad J. Schroeder.) + On FreeBSD, the \function{os.stat()} function now returns times with nanosecond resolution, and the returned object now has \member{st_gen} and \member{st_birthtime}. The \member{st_flags} member is also available, if the platform supports it. -% XXX patch 1180695, 1212117 +(Contributed by Antti Louko and Diego Petten\`o.) +% (Patch 1180695, 1212117) + +\item The old \module{regex} and \module{regsub} modules, which have been +deprecated ever since Python 2.0, have finally been deleted. +Other deleted modules: \module{statcache}, \module{tzparse}, +\module{whrandom}. + +\item The \file{lib-old} directory, +which includes ancient modules such as \module{dircmp} and +\module{ni}, was also deleted. \file{lib-old} wasn't on the default +\code{sys.path}, so unless your programs explicitly added the directory to +\code{sys.path}, this removal shouldn't affect your code. \item The \module{socket} module now supports \constant{AF_NETLINK} sockets on Linux, thanks to a patch from Philippe Biondi. @@ -605,9 +1394,26 @@ article about them is at \url{http://www.linuxjournal.com/article/7356}. In Python code, netlink addresses are represented as a tuple of 2 integers, \code{(\var{pid}, \var{group_mask})}. -\item New module: \module{spwd} provides functions for accessing the -shadow password database on systems that support it. -% XXX give example +Socket objects also gained accessor methods \method{getfamily()}, +\method{gettype()}, and \method{getproto()} methods to retrieve the +family, type, and protocol values for the socket. + +\item New module: the \module{spwd} module provides functions for +accessing the shadow password database on systems that support +shadow passwords. + +\item The Python developers switched from CVS to Subversion during the 2.5 +development process. Information about the exact build version is +available as the \code{sys.subversion} variable, a 3-tuple +of \code{(\var{interpreter-name}, \var{branch-name}, \var{revision-range})}. +For example, at the time of writing +my copy of 2.5 was reporting \code{('CPython', 'trunk', '45313:45315')}. + +This information is also available to C extensions via the +\cfunction{Py_GetBuildInfo()} function that returns a +string of build information like this: +\code{"trunk:45355:45356M, Apr 13 2006, 07:42:19"}. +(Contributed by Barry Warsaw.) \item The \class{TarFile} class in the \module{tarfile} module now has an \method{extractall()} method that extracts all members from the @@ -625,17 +1431,28 @@ of the Unicode character database. Version 3.2.0 is required by some specifications, so it's still available as \member{unicodedata.db_3_2_0}. -\item A new package \module{xml.etree} has been added, which contains -a subset of the ElementTree XML library. Available modules are -\module{ElementTree}, \module{ElementPath}, and -\module{ElementInclude}, from ElementTree 1.2.6. (Contributed by -Fredrik Lundh.) +\item The \module{webbrowser} module received a number of +enhancements. +It's now usable as a script with \code{python -m webbrowser}, taking a +URL as the argument; there are a number of switches +to control the behaviour (\programopt{-n} for a new browser window, +\programopt{-t} for a new tab). New module-level functions, +\function{open_new()} and \function{open_new_tab()}, were added +to support this. The module's \function{open()} function supports an +additional feature, an \var{autoraise} parameter that signals whether +to raise the open window when possible. A number of additional +browsers were added to the supported list such as Firefox, Opera, +Konqueror, and elinks. (Contributed by Oleg Broytmann and George +Brandl.) +% Patch #754022 + \item The \module{xmlrpclib} module now supports returning \class{datetime} objects for the XML-RPC date type. Supply \code{use_datetime=True} to the \function{loads()} function or the \class{Unmarshaller} class to enable this feature. -% XXX patch 1120353 + (Contributed by Skip Montanaro.) +% Patch 1120353 \end{itemize} @@ -643,13 +1460,358 @@ Fredrik Lundh.) %====================================================================== -% whole new modules get described in \subsections here +\subsection{The ctypes package} + +The \module{ctypes} package, written by Thomas Heller, has been added +to the standard library. \module{ctypes} lets you call arbitrary functions +in shared libraries or DLLs. Long-time users may remember the \module{dl} module, which +provides functions for loading shared libraries and calling functions in them. The \module{ctypes} package is much fancier. + +To load a shared library or DLL, you must create an instance of the +\class{CDLL} class and provide the name or path of the shared library +or DLL. Once that's done, you can call arbitrary functions +by accessing them as attributes of the \class{CDLL} object. + +\begin{verbatim} +import ctypes + +libc = ctypes.CDLL('libc.so.6') +result = libc.printf("Line of output\n") +\end{verbatim} + +Type constructors for the various C types are provided: \function{c_int}, +\function{c_float}, \function{c_double}, \function{c_char_p} (equivalent to \ctype{char *}), and so forth. Unlike Python's types, the C versions are all mutable; you can assign to their \member{value} attribute +to change the wrapped value. Python integers and strings will be automatically +converted to the corresponding C types, but for other types you +must call the correct type constructor. (And I mean \emph{must}; +getting it wrong will often result in the interpreter crashing +with a segmentation fault.) + +You shouldn't use \function{c_char_p} with a Python string when the C function will be modifying the memory area, because Python strings are +supposed to be immutable; breaking this rule will cause puzzling bugs. When you need a modifiable memory area, +use \function{create_string_buffer()}: + +\begin{verbatim} +s = "this is a string" +buf = ctypes.create_string_buffer(s) +libc.strfry(buf) +\end{verbatim} + +C functions are assumed to return integers, but you can set +the \member{restype} attribute of the function object to +change this: + +\begin{verbatim} +>>> libc.atof('2.71828') +-1783957616 +>>> libc.atof.restype = ctypes.c_double +>>> libc.atof('2.71828') +2.71828 +\end{verbatim} + +\module{ctypes} also provides a wrapper for Python's C API +as the \code{ctypes.pythonapi} object. This object does \emph{not} +release the global interpreter lock before calling a function, because the lock must be held when calling into the interpreter's code. +There's a \class{py_object()} type constructor that will create a +\ctype{PyObject *} pointer. A simple usage: + +\begin{verbatim} +import ctypes + +d = {} +ctypes.pythonapi.PyObject_SetItem(ctypes.py_object(d), + ctypes.py_object("abc"), ctypes.py_object(1)) +# d is now {'abc', 1}. +\end{verbatim} + +Don't forget to use \class{py_object()}; if it's omitted you end +up with a segmentation fault. + +\module{ctypes} has been around for a while, but people still write +and distribution hand-coded extension modules because you can't rely on \module{ctypes} being present. +Perhaps developers will begin to write +Python wrappers atop a library accessed through \module{ctypes} instead +of extension modules, now that \module{ctypes} is included with core Python. + +\begin{seealso} + +\seeurl{http://starship.python.net/crew/theller/ctypes/} +{The ctypes web page, with a tutorial, reference, and FAQ.} + +\end{seealso} + + +%====================================================================== +\subsection{The ElementTree package} + +A subset of Fredrik Lundh's ElementTree library for processing XML has +been added to the standard library as \module{xmlcore.etree}. The +available modules are +\module{ElementTree}, \module{ElementPath}, and +\module{ElementInclude} from ElementTree 1.2.6. +The \module{cElementTree} accelerator module is also included. + +The rest of this section will provide a brief overview of using +ElementTree. Full documentation for ElementTree is available at +\url{http://effbot.org/zone/element-index.htm}. -% XXX new distutils features: upload +ElementTree represents an XML document as a tree of element nodes. +The text content of the document is stored as the \member{.text} +and \member{.tail} attributes of +(This is one of the major differences between ElementTree and +the Document Object Model; in the DOM there are many different +types of node, including \class{TextNode}.) -% XXX should hashlib perhaps be described here instead? -% XXX should xml.etree perhaps be described here instead? +The most commonly used parsing function is \function{parse()}, that +takes either a string (assumed to contain a filename) or a file-like +object and returns an \class{ElementTree} instance: + +\begin{verbatim} +from xmlcore.etree import ElementTree as ET + +tree = ET.parse('ex-1.xml') + +feed = urllib.urlopen( + 'http://planet.python.org/rss10.xml') +tree = ET.parse(feed) +\end{verbatim} +Once you have an \class{ElementTree} instance, you +can call its \method{getroot()} method to get the root \class{Element} node. + +There's also an \function{XML()} function that takes a string literal +and returns an \class{Element} node (not an \class{ElementTree}). +This function provides a tidy way to incorporate XML fragments, +approaching the convenience of an XML literal: + +\begin{verbatim} +svg = et.XML(""" + """) +svg.set('height', '320px') +svg.append(elem1) +\end{verbatim} + +Each XML element supports some dictionary-like and some list-like +access methods. Dictionary-like operations are used to access attribute +values, and list-like operations are used to access child nodes. + +\begin{tableii}{c|l}{code}{Operation}{Result} + \lineii{elem[n]}{Returns n'th child element.} + \lineii{elem[m:n]}{Returns list of m'th through n'th child elements.} + \lineii{len(elem)}{Returns number of child elements.} + \lineii{elem.getchildren()}{Returns list of child elements.} + \lineii{elem.append(elem2)}{Adds \var{elem2} as a child.} + \lineii{elem.insert(index, elem2)}{Inserts \var{elem2} at the specified location.} + \lineii{del elem[n]}{Deletes n'th child element.} + \lineii{elem.keys()}{Returns list of attribute names.} + \lineii{elem.get(name)}{Returns value of attribute \var{name}.} + \lineii{elem.set(name, value)}{Sets new value for attribute \var{name}.} + \lineii{elem.attrib}{Retrieves the dictionary containing attributes.} + \lineii{del elem.attrib[name]}{Deletes attribute \var{name}.} +\end{tableii} + +Comments and processing instructions are also represented as +\class{Element} nodes. To check if a node is a comment or processing +instructions: + +\begin{verbatim} +if elem.tag is ET.Comment: + ... +elif elem.tag is ET.ProcessingInstruction: + ... +\end{verbatim} + +To generate XML output, you should call the +\method{ElementTree.write()} method. Like \function{parse()}, +it can take either a string or a file-like object: + +\begin{verbatim} +# Encoding is US-ASCII +tree.write('output.xml') + +# Encoding is UTF-8 +f = open('output.xml', 'w') +tree.write(f, 'utf-8') +\end{verbatim} + +(Caution: the default encoding used for output is ASCII, which isn't +very useful for general XML work, raising an exception if there are +any characters with values greater than 127. You should always +specify a different encoding such as UTF-8 that can handle any Unicode +character.) + +This section is only a partial description of the ElementTree interfaces. +Please read the package's official documentation for more details. + +\begin{seealso} + +\seeurl{http://effbot.org/zone/element-index.htm} +{Official documentation for ElementTree.} + + +\end{seealso} + + +%====================================================================== +\subsection{The hashlib package} + +A new \module{hashlib} module, written by Gregory P. Smith, +has been added to replace the +\module{md5} and \module{sha} modules. \module{hashlib} adds support +for additional secure hashes (SHA-224, SHA-256, SHA-384, and SHA-512). +When available, the module uses OpenSSL for fast platform optimized +implementations of algorithms. + +The old \module{md5} and \module{sha} modules still exist as wrappers +around hashlib to preserve backwards compatibility. The new module's +interface is very close to that of the old modules, but not identical. +The most significant difference is that the constructor functions +for creating new hashing objects are named differently. + +\begin{verbatim} +# Old versions +h = md5.md5() +h = md5.new() + +# New version +h = hashlib.md5() + +# Old versions +h = sha.sha() +h = sha.new() + +# New version +h = hashlib.sha1() + +# Hash that weren't previously available +h = hashlib.sha224() +h = hashlib.sha256() +h = hashlib.sha384() +h = hashlib.sha512() + +# Alternative form +h = hashlib.new('md5') # Provide algorithm as a string +\end{verbatim} + +Once a hash object has been created, its methods are the same as before: +\method{update(\var{string})} hashes the specified string into the +current digest state, \method{digest()} and \method{hexdigest()} +return the digest value as a binary string or a string of hex digits, +and \method{copy()} returns a new hashing object with the same digest state. + + +%====================================================================== +\subsection{The sqlite3 package} + +The pysqlite module (\url{http://www.pysqlite.org}), a wrapper for the +SQLite embedded database, has been added to the standard library under +the package name \module{sqlite3}. + +SQLite is a C library that provides a SQL-language database that +stores data in disk files without requiring a separate server process. +pysqlite was written by Gerhard H\"aring and provides a SQL interface +compliant with the DB-API 2.0 specification described by +\pep{249}. This means that it should be possible to write the first +version of your applications using SQLite for data storage. If +switching to a larger database such as PostgreSQL or Oracle is +later necessary, the switch should be relatively easy. + +If you're compiling the Python source yourself, note that the source +tree doesn't include the SQLite code, only the wrapper module. +You'll need to have the SQLite libraries and headers installed before +compiling Python, and the build process will compile the module when +the necessary headers are available. + +To use the module, you must first create a \class{Connection} object +that represents the database. Here the data will be stored in the +\file{/tmp/example} file: + +\begin{verbatim} +conn = sqlite3.connect('/tmp/example') +\end{verbatim} + +You can also supply the special name \samp{:memory:} to create +a database in RAM. + +Once you have a \class{Connection}, you can create a \class{Cursor} +object and call its \method{execute()} method to perform SQL commands: + +\begin{verbatim} +c = conn.cursor() + +# Create table +c.execute('''create table stocks +(date timestamp, trans varchar, symbol varchar, + qty decimal, price decimal)''') + +# Insert a row of data +c.execute("""insert into stocks + values ('2006-01-05','BUY','RHAT',100,35.14)""") +\end{verbatim} + +Usually your SQL operations will need to use values from Python +variables. You shouldn't assemble your query using Python's string +operations because doing so is insecure; it makes your program +vulnerable to an SQL injection attack. + +Instead, use SQLite's parameter substitution. Put \samp{?} as a +placeholder wherever you want to use a value, and then provide a tuple +of values as the second argument to the cursor's \method{execute()} +method. For example: + +\begin{verbatim} +# Never do this -- insecure! +symbol = 'IBM' +c.execute("... where symbol = '%s'" % symbol) + +# Do this instead +t = (symbol,) +c.execute('select * from stocks where symbol=?', ('IBM',)) + +# Larger example +for t in (('2006-03-28', 'BUY', 'IBM', 1000, 45.00), + ('2006-04-05', 'BUY', 'MSOFT', 1000, 72.00), + ('2006-04-06', 'SELL', 'IBM', 500, 53.00), + ): + c.execute('insert into stocks values (?,?,?,?,?)', t) +\end{verbatim} + +To retrieve data after executing a SELECT statement, you can either +treat the cursor as an iterator, call the cursor's \method{fetchone()} +method to retrieve a single matching row, +or call \method{fetchall()} to get a list of the matching rows. + +This example uses the iterator form: + +\begin{verbatim} +>>> c = conn.cursor() +>>> c.execute('select * from stocks order by price') +>>> for row in c: +... print row +... +(u'2006-01-05', u'BUY', u'RHAT', 100, 35.140000000000001) +(u'2006-03-28', u'BUY', u'IBM', 1000, 45.0) +(u'2006-04-06', u'SELL', u'IBM', 500, 53.0) +(u'2006-04-05', u'BUY', u'MSOFT', 1000, 72.0) +>>> +\end{verbatim} + +For more information about the SQL dialect supported by SQLite, see +\url{http://www.sqlite.org}. + +\begin{seealso} + +\seeurl{http://www.pysqlite.org} +{The pysqlite web page.} + +\seeurl{http://www.sqlite.org} +{The SQLite web page; the documentation describes the syntax and the +available data types for the supported SQL dialect.} + +\seepep{249}{Database API Specification 2.0}{PEP written by +Marc-Andr\'e Lemburg.} + +\end{seealso} % ====================================================================== @@ -659,11 +1821,32 @@ Changes to Python's build process and to the C API include: \begin{itemize} -\item The design of the bytecode compiler has changed a great deal, no -longer generating bytecode by traversing the parse tree. Instead +\item The largest change to the C API came from \pep{353}, +which modifies the interpreter to use a \ctype{Py_ssize_t} type +definition instead of \ctype{int}. See the earlier +section~\ref{section-353} for a discussion of this change. + +\item The design of the bytecode compiler has changed a great deal, to +no longer generate bytecode by traversing the parse tree. Instead the parse tree is converted to an abstract syntax tree (or AST), and it is the abstract syntax tree that's traversed to produce the bytecode. +It's possible for Python code to obtain AST objects by using the +\function{compile()} built-in and specifying \code{_ast.PyCF_ONLY_AST} +as the value of the +\var{flags} parameter: + +\begin{verbatim} +from _ast import PyCF_ONLY_AST +ast = compile("""a=0 +for i in range(10): + a += i +""", "", 'exec', PyCF_ONLY_AST) + +assignment = ast.body[0] +for_loop = ast.body[1] +\end{verbatim} + No documentation has been written for the AST code yet. To start learning about it, read the definition of the various AST nodes in \file{Parser/Python.asdl}. A Python script reads this file and @@ -689,6 +1872,18 @@ AST sprints at conferences such as PyCon. new set, \cfunction{PySet_Add()} and \cfunction{PySet_Discard()} to add and remove elements, and \cfunction{PySet_Contains} and \cfunction{PySet_Size} to examine the set's state. +(Contributed by Raymond Hettinger.) + +\item C code can now obtain information about the exact revision +of the Python interpreter by calling the +\cfunction{Py_GetBuildInfo()} function that returns a +string of build information like this: +\code{"trunk:45355:45356M, Apr 13 2006, 07:42:19"}. +(Contributed by Barry Warsaw.) + +\item The CPython interpreter is still written in C, but +the code can now be compiled with a {\Cpp} compiler without errors. +(Implemented by Anthony Baxter, Martin von~L\"owis, Skip Montanaro.) \item The \cfunction{PyRange_New()} function was removed. It was never documented, never used in the core code, and had dangerously lax @@ -700,7 +1895,17 @@ error checking. %====================================================================== \subsection{Port-Specific Changes} -Platform-specific changes go here. +\begin{itemize} + +\item MacOS X (10.3 and higher): dynamic loading of modules +now uses the \cfunction{dlopen()} function instead of MacOS-specific +functions. + +\item Windows: \file{.dll} is no longer supported as a filename extension for +extension modules. \file{.pyd} is now the only filename extension that will +be searched for. + +\end{itemize} %====================================================================== @@ -715,7 +1920,38 @@ Some of the more notable changes are: \begin{itemize} -\item Details go here. +\item Evan Jones's patch to obmalloc, first described in a talk +at PyCon DC 2005, was applied. Python 2.4 allocated small objects in +256K-sized arenas, but never freed arenas. With this patch, Python +will free arenas when they're empty. The net effect is that on some +platforms, when you allocate many objects, Python's memory usage may +actually drop when you delete them, and the memory may be returned to +the operating system. (Implemented by Evan Jones, and reworked by Tim +Peters.) + +Note that this change means extension modules need to be more careful +with how they allocate memory. Python's API has many different +functions for allocating memory that are grouped into families. For +example, \cfunction{PyMem_Malloc()}, \cfunction{PyMem_Realloc()}, and +\cfunction{PyMem_Free()} are one family that allocates raw memory, +while \cfunction{PyObject_Malloc()}, \cfunction{PyObject_Realloc()}, +and \cfunction{PyObject_Free()} are another family that's supposed to +be used for creating Python objects. + +Previously these different families all reduced to the platform's +\cfunction{malloc()} and \cfunction{free()} functions. This meant +it didn't matter if you got things wrong and allocated memory with the +\cfunction{PyMem} function but freed it with the \cfunction{PyObject} +function. With the obmalloc change, these families now do different +things, and mismatches will probably result in a segfault. You should +carefully test your C extension modules with Python 2.5. + +\item Coverity, a company that markets a source code analysis tool + called Prevent, provided the results of their examination of the Python + source code. The analysis found about 60 bugs that + were quickly fixed. Many of the bugs were refcounting problems, often + occurring in error-handling code. See + \url{http://scan.coverity.com} for the statistics. \end{itemize} @@ -728,24 +1964,32 @@ changes to your code: \begin{itemize} -\item Some old deprecated modules (\module{statcache}, \module{tzparse}, - \module{whrandom}) have been moved to \file{Lib/lib-old}. -You can get access to these modules again by adding the directory -to your \code{sys.path}: - -\begin{verbatim} -import os -from distutils import sysconfig - -lib_dir = sysconfig.get_python_lib(standard_lib=True) -old_dir = os.path.join(lib_dir, 'lib-old') -sys.path.append(old_dir) -\end{verbatim} - -Doing so is discouraged, however; it's better to update any code that -still uses these modules. - -% the pickle module no longer uses the deprecated bin parameter. +\item ASCII is now the default encoding for modules. It's now +a syntax error if a module contains string literals with 8-bit +characters but doesn't have an encoding declaration. In Python 2.4 +this triggered a warning, not a syntax error. + +\item The \module{pickle} module no longer uses the deprecated \var{bin} parameter. + +\item Previously, the \member{gi_frame} attribute of a generator +was always a frame object. Because of the \pep{342} changes +described in section~\ref{section-generators}, it's now possible +for \member{gi_frame} to be \code{None}. + +\item C API: Many functions now use \ctype{Py_ssize_t} +instead of \ctype{int} to allow processing more data +on 64-bit machines. Extension code may need to make +the same change to avoid warnings and to support 64-bit machines. +See the earlier +section~\ref{section-353} for a discussion of this change. + +\item C API: +The obmalloc changes mean that +you must be careful to not mix usage +of the \cfunction{PyMem_*()} and \cfunction{PyObject_*()} +families of functions. Memory allocated with +one family's \cfunction{*_Malloc()} must be +freed with the corresponding family's \cfunction{*_Free()} function. \end{itemize} @@ -755,6 +1999,7 @@ still uses these modules. The author would like to thank the following people for offering suggestions, corrections and assistance with various drafts of this -article: . +article: Phillip J. Eby, Kent Johnson, Martin von~L\"owis, Gustavo +Niemeyer, Mike Rovner, Thomas Wouters. \end{document} diff --git a/Grammar/Grammar b/Grammar/Grammar index 33c37d2..a613de6 100644 --- a/Grammar/Grammar +++ b/Grammar/Grammar @@ -7,6 +7,9 @@ # with someone who can; ask around on python-dev for help. Fred # Drake will probably be listening there. +# NOTE WELL: You should also follow all the steps listed in PEP 306, +# "How to Change Python's Grammar" + # Start symbols for the grammar: # single_input is a single interactive statement; # file_input is a module or sequence of commands read from an input file; @@ -119,11 +122,11 @@ argument: test [gen_for] | test '=' test # Really [keyword '='] test list_iter: list_for | list_if list_for: 'for' exprlist 'in' testlist_safe [list_iter] -list_if: 'if' test [list_iter] +list_if: 'if' old_test [list_iter] gen_iter: gen_for | gen_if gen_for: 'for' exprlist 'in' or_test [gen_iter] -gen_if: 'if' test [gen_iter] +gen_if: 'if' old_test [gen_iter] testlist1: test (',' test)* diff --git a/Include/Python-ast.h b/Include/Python-ast.h index b3bc063..3e21030 100644 --- a/Include/Python-ast.h +++ b/Include/Python-ast.h @@ -35,9 +35,10 @@ typedef struct _keyword *keyword_ty; typedef struct _alias *alias_ty; +enum _mod_kind {Module_kind=1, Interactive_kind=2, Expression_kind=3, + Suite_kind=4}; struct _mod { - enum { Module_kind=1, Interactive_kind=2, Expression_kind=3, - Suite_kind=4 } kind; + enum _mod_kind kind; union { struct { asdl_seq *body; @@ -58,14 +59,15 @@ struct _mod { } v; }; +enum _stmt_kind {FunctionDef_kind=1, ClassDef_kind=2, Return_kind=3, + Delete_kind=4, Assign_kind=5, AugAssign_kind=6, Print_kind=7, + For_kind=8, While_kind=9, If_kind=10, With_kind=11, + Raise_kind=12, TryExcept_kind=13, TryFinally_kind=14, + Assert_kind=15, Import_kind=16, ImportFrom_kind=17, + Exec_kind=18, Global_kind=19, Expr_kind=20, Pass_kind=21, + Break_kind=22, Continue_kind=23}; struct _stmt { - enum { FunctionDef_kind=1, ClassDef_kind=2, Return_kind=3, - Delete_kind=4, Assign_kind=5, AugAssign_kind=6, Print_kind=7, - For_kind=8, While_kind=9, If_kind=10, With_kind=11, - Raise_kind=12, TryExcept_kind=13, TryFinally_kind=14, - Assert_kind=15, Import_kind=16, ImportFrom_kind=17, - Exec_kind=18, Global_kind=19, Expr_kind=20, Pass_kind=21, - Break_kind=22, Continue_kind=23 } kind; + enum _stmt_kind kind; union { struct { identifier name; @@ -181,12 +183,14 @@ struct _stmt { int col_offset; }; +enum _expr_kind {BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4, + IfExp_kind=5, Dict_kind=6, ListComp_kind=7, + GeneratorExp_kind=8, Yield_kind=9, Compare_kind=10, + Call_kind=11, Repr_kind=12, Num_kind=13, Str_kind=14, + Attribute_kind=15, Subscript_kind=16, Name_kind=17, + List_kind=18, Tuple_kind=19}; struct _expr { - enum { BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4, - IfExp_kind=5, Dict_kind=6, ListComp_kind=7, GeneratorExp_kind=8, - Yield_kind=9, Compare_kind=10, Call_kind=11, Repr_kind=12, - Num_kind=13, Str_kind=14, Attribute_kind=15, Subscript_kind=16, - Name_kind=17, List_kind=18, Tuple_kind=19 } kind; + enum _expr_kind kind; union { struct { boolop_ty op; @@ -236,7 +240,7 @@ struct _expr { struct { expr_ty left; - asdl_seq *ops; + asdl_int_seq *ops; asdl_seq *comparators; } Compare; @@ -292,9 +296,9 @@ struct _expr { int col_offset; }; +enum _slice_kind {Ellipsis_kind=1, Slice_kind=2, ExtSlice_kind=3, Index_kind=4}; struct _slice { - enum { Ellipsis_kind=1, Slice_kind=2, ExtSlice_kind=3, Index_kind=4 } - kind; + enum _slice_kind kind; union { struct { expr_ty lower; @@ -323,6 +327,8 @@ struct _excepthandler { expr_ty type; expr_ty name; asdl_seq *body; + int lineno; + int col_offset; }; struct _arguments { @@ -403,7 +409,7 @@ expr_ty ListComp(expr_ty elt, asdl_seq * generators, int lineno, int expr_ty GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno, int col_offset, PyArena *arena); expr_ty Yield(expr_ty value, int lineno, int col_offset, PyArena *arena); -expr_ty Compare(expr_ty left, asdl_seq * ops, asdl_seq * comparators, int +expr_ty Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators, int lineno, int col_offset, PyArena *arena); expr_ty Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, expr_ty starargs, expr_ty kwargs, int lineno, int col_offset, PyArena @@ -427,8 +433,8 @@ slice_ty ExtSlice(asdl_seq * dims, PyArena *arena); slice_ty Index(expr_ty value, PyArena *arena); comprehension_ty comprehension(expr_ty target, expr_ty iter, asdl_seq * ifs, PyArena *arena); -excepthandler_ty excepthandler(expr_ty type, expr_ty name, asdl_seq * body, - PyArena *arena); +excepthandler_ty excepthandler(expr_ty type, expr_ty name, asdl_seq * body, int + lineno, int col_offset, PyArena *arena); arguments_ty arguments(asdl_seq * args, identifier vararg, identifier kwarg, asdl_seq * defaults, PyArena *arena); keyword_ty keyword(identifier arg, expr_ty value, PyArena *arena); diff --git a/Include/abstract.h b/Include/abstract.h index b76f257..d4bd588 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -4,6 +4,11 @@ extern "C" { #endif +#ifdef PY_SSIZE_T_CLEAN +#define PyObject_CallFunction _PyObject_CallFunction_SizeT +#define PyObject_CallMethod _PyObject_CallMethod_SizeT +#endif + /* Abstract Object Interface (many thanks to Jim Fulton) */ /* @@ -337,6 +342,11 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/ Python expression: o.method(args). */ + PyAPI_FUNC(PyObject *) _PyObject_CallFunction_SizeT(PyObject *callable, + char *format, ...); + PyAPI_FUNC(PyObject *) _PyObject_CallMethod_SizeT(PyObject *o, + char *name, + char *format, ...); PyAPI_FUNC(PyObject *) PyObject_CallFunctionObjArgs(PyObject *callable, ...); diff --git a/Include/asdl.h b/Include/asdl.h index c1c5603..84e837e 100644 --- a/Include/asdl.h +++ b/Include/asdl.h @@ -5,7 +5,9 @@ typedef PyObject * identifier; typedef PyObject * string; typedef PyObject * object; +#ifndef __cplusplus typedef enum {false, true} bool; +#endif /* It would be nice if the code generated by asdl_c.py was completely independent of Python, but it is a goal the requires too much work @@ -20,7 +22,13 @@ typedef struct { void *elements[1]; } asdl_seq; +typedef struct { + int size; + int elements[1]; +} asdl_int_seq; + asdl_seq *asdl_seq_new(int size, PyArena *arena); +asdl_int_seq *asdl_int_seq_new(int size, PyArena *arena); #define asdl_seq_GET(S, I) (S)->elements[(I)] #define asdl_seq_LEN(S) ((S) == NULL ? 0 : (S)->size) diff --git a/Include/code.h b/Include/code.h index a4509e3..23d9e17 100644 --- a/Include/code.h +++ b/Include/code.h @@ -44,7 +44,7 @@ typedef struct { /* These are no longer used. */ #define CO_GENERATOR_ALLOWED 0x1000 #define CO_FUTURE_DIVISION 0x2000 -#define CO_FUTURE_ABSIMPORT 0x4000 /* absolute import by default */ +#define CO_FUTURE_ABSOLUTE_IMPORT 0x4000 /* do absolute imports by default */ #define CO_FUTURE_WITH_STATEMENT 0x8000 #endif @@ -72,6 +72,21 @@ PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); ((*(co)->co_code->ob_type->tp_as_buffer->bf_getreadbuffer) \ ((co)->co_code, 0, (void **)(pp))) +typedef struct _addr_pair { + int ap_lower; + int ap_upper; +} PyAddrPair; + +/* Check whether lasti (an instruction offset) falls outside bounds + and whether it is a line number that should be traced. Returns + a line number if it should be traced or -1 if the line should not. + + If lasti is not within bounds, updates bounds. +*/ + +PyAPI_FUNC(int) PyCode_CheckLineNumber(PyCodeObject* co, + int lasti, PyAddrPair *bounds); + #ifdef __cplusplus } #endif diff --git a/Include/compile.h b/Include/compile.h index 4ac6982..2bde6fb 100644 --- a/Include/compile.h +++ b/Include/compile.h @@ -22,7 +22,7 @@ typedef struct { #define FUTURE_NESTED_SCOPES "nested_scopes" #define FUTURE_GENERATORS "generators" #define FUTURE_DIVISION "division" -#define FUTURE_ABSIMPORT "absolute_import" +#define FUTURE_ABSOLUTE_IMPORT "absolute_import" #define FUTURE_WITH_STATEMENT "with_statement" struct _mod; /* Declare the existence of this type */ diff --git a/Include/genobject.h b/Include/genobject.h index f4226ed..ca84432 100644 --- a/Include/genobject.h +++ b/Include/genobject.h @@ -13,6 +13,7 @@ typedef struct { PyObject_HEAD /* The gi_ prefix is intended to remind of generator-iterator. */ + /* Note: gi_frame can be NULL if the generator is "finished" */ struct _frame *gi_frame; /* True if generator is being executed. */ @@ -28,6 +29,7 @@ PyAPI_DATA(PyTypeObject) PyGen_Type; #define PyGen_CheckExact(op) ((op)->ob_type == &PyGen_Type) PyAPI_FUNC(PyObject *) PyGen_New(struct _frame *); +PyAPI_FUNC(int) PyGen_NeedsFinalizing(PyGenObject *); #ifdef __cplusplus } diff --git a/Include/modsupport.h b/Include/modsupport.h index c356f03..23d5d3a 100644 --- a/Include/modsupport.h +++ b/Include/modsupport.h @@ -17,8 +17,10 @@ extern "C" { #define PyArg_ParseTupleAndKeywords _PyArg_ParseTupleAndKeywords_SizeT #define PyArg_VaParse _PyArg_VaParse_SizeT #define PyArg_VaParseTupleAndKeywords _PyArg_VaParseTupleAndKeywords_SizeT -#define PyArg_BuildValue _PyArg_BuildValue_SizeT -#define PyArg_VaBuildValue _PyArg_VaBuildValue_SizeT +#define Py_BuildValue _Py_BuildValue_SizeT +#define Py_VaBuildValue _Py_VaBuildValue_SizeT +#else +PyAPI_FUNC(PyObject *) _Py_VaBuildValue_SizeT(const char *, va_list); #endif PyAPI_FUNC(int) PyArg_Parse(PyObject *, const char *, ...); @@ -27,6 +29,7 @@ PyAPI_FUNC(int) PyArg_ParseTupleAndKeywords(PyObject *, PyObject *, const char *, char **, ...); PyAPI_FUNC(int) PyArg_UnpackTuple(PyObject *, const char *, Py_ssize_t, Py_ssize_t, ...); PyAPI_FUNC(PyObject *) Py_BuildValue(const char *, ...); +PyAPI_FUNC(PyObject *) _Py_BuildValue_SizeT(const char *, ...); PyAPI_FUNC(int) _PyArg_NoKeywords(const char *funcname, PyObject *kw); PyAPI_FUNC(int) PyArg_VaParse(PyObject *, const char *, va_list); diff --git a/Include/object.h b/Include/object.h index 9198007..cdbddfe 100644 --- a/Include/object.h +++ b/Include/object.h @@ -327,6 +327,7 @@ typedef struct _typeobject { Py_ssize_t tp_allocs; Py_ssize_t tp_frees; Py_ssize_t tp_maxalloc; + struct _typeobject *tp_prev; struct _typeobject *tp_next; #endif } PyTypeObject; @@ -566,6 +567,9 @@ environment the global variable trick is not safe.) PyAPI_DATA(Py_ssize_t) _Py_RefTotal; PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname, int lineno, PyObject *op); +PyAPI_FUNC(PyObject *) _PyDict_Dummy(void); +PyAPI_FUNC(PyObject *) _PySet_Dummy(void); +PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void); #define _Py_INC_REFTOTAL _Py_RefTotal++ #define _Py_DEC_REFTOTAL _Py_RefTotal-- #define _Py_REF_DEBUG_COMMA , @@ -583,8 +587,9 @@ PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname, #ifdef COUNT_ALLOCS PyAPI_FUNC(void) inc_count(PyTypeObject *); +PyAPI_FUNC(void) dec_count(PyTypeObject *); #define _Py_INC_TPALLOCS(OP) inc_count((OP)->ob_type) -#define _Py_INC_TPFREES(OP) (OP)->ob_type->tp_frees++ +#define _Py_INC_TPFREES(OP) dec_count((OP)->ob_type) #define _Py_DEC_TPFREES(OP) (OP)->ob_type->tp_frees-- #define _Py_COUNT_ALLOCS_COMMA , #else @@ -630,6 +635,40 @@ PyAPI_FUNC(void) _Py_AddToAllObjects(PyObject *, int force); else \ _Py_Dealloc((PyObject *)(op)) +/* Safely decref `op` and set `op` to NULL, especially useful in tp_clear + * and tp_dealloc implementatons. + * + * Note that "the obvious" code can be deadly: + * + * Py_XDECREF(op); + * op = NULL; + * + * Typically, `op` is something like self->containee, and `self` is done + * using its `containee` member. In the code sequence above, suppose + * `containee` is non-NULL with a refcount of 1. Its refcount falls to + * 0 on the first line, which can trigger an arbitrary amount of code, + * possibly including finalizers (like __del__ methods or weakref callbacks) + * coded in Python, which in turn can release the GIL and allow other threads + * to run, etc. Such code may even invoke methods of `self` again, or cause + * cyclic gc to trigger, but-- oops! --self->containee still points to the + * object being torn down, and it may be in an insane state while being torn + * down. This has in fact been a rich historic source of miserable (rare & + * hard-to-diagnose) segfaulting (and other) bugs. + * + * The safe way is: + * + * Py_CLEAR(op); + * + * That arranges to set `op` to NULL _before_ decref'ing, so that any code + * triggered as a side-effect of `op` getting torn down no longer believes + * `op` points to a valid object. + * + * There are cases where it's safe to use the naive code, but they're brittle. + * For example, if `op` points to a Python integer, you know that destroying + * one of those can't cause problems -- but in part that relies on that + * Python integers aren't currently weakly referencable. Best practice is + * to use Py_CLEAR() even if you can't think of a reason for why you need to. + */ #define Py_CLEAR(op) \ do { \ if (op) { \ diff --git a/Include/objimpl.h b/Include/objimpl.h index 7c68194..03b6a8d 100644 --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -101,7 +101,7 @@ PyAPI_FUNC(void) PyObject_Free(void *); /* Macros */ #ifdef WITH_PYMALLOC -#ifdef PYMALLOC_DEBUG +#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */ PyAPI_FUNC(void *) _PyObject_DebugMalloc(size_t nbytes); PyAPI_FUNC(void *) _PyObject_DebugRealloc(void *p, size_t nbytes); PyAPI_FUNC(void) _PyObject_DebugFree(void *p); @@ -124,11 +124,7 @@ PyAPI_FUNC(void) _PyObject_DebugMallocStats(void); #else /* ! WITH_PYMALLOC */ #define PyObject_MALLOC PyMem_MALLOC #define PyObject_REALLOC PyMem_REALLOC -/* This is an odd one! For backward compatibility with old extensions, the - PyMem "release memory" functions have to invoke the object allocator's - free() function. When pymalloc isn't enabled, that leaves us using - the platform free(). */ -#define PyObject_FREE free +#define PyObject_FREE PyMem_FREE #endif /* WITH_PYMALLOC */ @@ -307,13 +303,13 @@ PyAPI_FUNC(void) PyObject_GC_Del(void *); * "visit" and "arg". This is intended to keep tp_traverse functions * looking as much alike as possible. */ -#define Py_VISIT(op) \ - do { \ - if (op) { \ - int vret = visit((op), arg); \ - if (vret) \ - return vret; \ - } \ +#define Py_VISIT(op) \ + do { \ + if (op) { \ + int vret = visit((PyObject *)(op), arg); \ + if (vret) \ + return vret; \ + } \ } while (0) /* This is here for the sake of backwards compatibility. Extensions that diff --git a/Include/pymem.h b/Include/pymem.h index f8aef29..671f967 100644 --- a/Include/pymem.h +++ b/Include/pymem.h @@ -59,6 +59,7 @@ PyAPI_FUNC(void) PyMem_Free(void *); /* Redirect all memory operations to Python's debugging allocator. */ #define PyMem_MALLOC PyObject_MALLOC #define PyMem_REALLOC PyObject_REALLOC +#define PyMem_FREE PyObject_FREE #else /* ! PYMALLOC_DEBUG */ @@ -68,14 +69,10 @@ PyAPI_FUNC(void) PyMem_Free(void *); pymalloc. To solve these problems, allocate an extra byte. */ #define PyMem_MALLOC(n) malloc((n) ? (n) : 1) #define PyMem_REALLOC(p, n) realloc((p), (n) ? (n) : 1) +#define PyMem_FREE free #endif /* PYMALLOC_DEBUG */ -/* In order to avoid breaking old code mixing PyObject_{New, NEW} with - PyMem_{Del, DEL} and PyMem_{Free, FREE}, the PyMem "release memory" - functions have to be redirected to the object deallocator. */ -#define PyMem_FREE PyObject_FREE - /* * Type-oriented memory interface * ============================== @@ -95,11 +92,11 @@ PyAPI_FUNC(void) PyMem_Free(void *); #define PyMem_RESIZE(p, type, n) \ ( (p) = (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) -/* In order to avoid breaking old code mixing PyObject_{New, NEW} with - PyMem_{Del, DEL} and PyMem_{Free, FREE}, the PyMem "release memory" - functions have to be redirected to the object deallocator. */ -#define PyMem_Del PyObject_Free -#define PyMem_DEL PyObject_FREE +/* PyMem{Del,DEL} are left over from ancient days, and shouldn't be used + * anymore. They're just confusing aliases for PyMem_{Free,FREE} now. + */ +#define PyMem_Del PyMem_Free +#define PyMem_DEL PyMem_FREE #ifdef __cplusplus } diff --git a/Include/pyport.h b/Include/pyport.h index 9111d86..2bce415 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -85,6 +85,10 @@ typedef PY_LONG_LONG Py_intptr_t; # error "Python needs a typedef for Py_uintptr_t in pyport.h." #endif /* HAVE_UINTPTR_T */ +/* Py_ssize_t is a signed integral type such that sizeof(Py_ssize_t) == + * sizeof(size_t). C99 doesn't define such a thing directly (size_t is an + * unsigned integral type). See PEP 353 for details. + */ #ifdef HAVE_SSIZE_T typedef ssize_t Py_ssize_t; #elif SIZEOF_VOID_P == SIZEOF_SIZE_T @@ -92,7 +96,46 @@ typedef Py_intptr_t Py_ssize_t; #else # error "Python needs a typedef for Py_ssize_t in pyport.h." #endif + +/* Largest positive value of type Py_ssize_t. */ #define PY_SSIZE_T_MAX ((Py_ssize_t)(((size_t)-1)>>1)) +/* Smallest negative value of type Py_ssize_t. */ +#define PY_SSIZE_T_MIN (-PY_SSIZE_T_MAX-1) + +/* PY_FORMAT_SIZE_T is a platform-specific modifier for use in a printf + * format to convert an argument with the width of a size_t or Py_ssize_t. + * C99 introduced "z" for this purpose, but not all platforms support that; + * e.g., MS compilers use "I" instead. + * + * These "high level" Python format functions interpret "z" correctly on + * all platforms (Python interprets the format string itself, and does whatever + * the platform C requires to convert a size_t/Py_ssize_t argument): + * + * PyString_FromFormat + * PyErr_Format + * PyString_FromFormatV + * + * Lower-level uses require that you interpolate the correct format modifier + * yourself (e.g., calling printf, fprintf, sprintf, PyOS_snprintf); for + * example, + * + * Py_ssize_t index; + * fprintf(stderr, "index %" PY_FORMAT_SIZE_T "d sucks\n", index); + * + * That will expand to %ld, or %Id, or to something else correct for a + * Py_ssize_t on the platform. + */ +#ifndef PY_FORMAT_SIZE_T +# if SIZEOF_SIZE_T == SIZEOF_INT +# define PY_FORMAT_SIZE_T "" +# elif SIZEOF_SIZE_T == SIZEOF_LONG +# define PY_FORMAT_SIZE_T "l" +# elif defined(MS_WINDOWS) +# define PY_FORMAT_SIZE_T "I" +# else +# error "This platform's pyconfig.h needs to define PY_FORMAT_SIZE_T" +# endif +#endif #include @@ -367,7 +410,8 @@ extern "C" { * typedef int T1 Py_DEPRECATED(2.4); * extern int x() Py_DEPRECATED(2.5); */ -#if defined(__GNUC__) && (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) +#if defined(__GNUC__) && ((__GNUC__ >= 4) || \ + (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)) #define Py_DEPRECATED(VERSION_UNUSED) __attribute__((__deprecated__)) #else #define Py_DEPRECATED(VERSION_UNUSED) @@ -436,14 +480,12 @@ extern double hypot(double, double); #endif -/******************************************************************* -On 4.4BSD-descendants, ctype functions serves the whole range of -wchar_t character set rather than single byte code points only. -This characteristic can break some operations of string object -including str.upper() and str.split() on UTF-8 locales. This -workaround was provided by Tim Robbins of FreeBSD project. He said -the incompatibility will be fixed in FreeBSD 6. -********************************************************************/ +/* On 4.4BSD-descendants, ctype functions serves the whole range of + * wchar_t character set rather than single byte code points only. + * This characteristic can break some operations of string object + * including str.upper() and str.split() on UTF-8 locales. This + * workaround was provided by Tim Robbins of FreeBSD project. + */ #ifdef __FreeBSD__ #include diff --git a/Include/setobject.h b/Include/setobject.h index cea95cc..cc93968 100644 --- a/Include/setobject.h +++ b/Include/setobject.h @@ -78,10 +78,13 @@ PyAPI_FUNC(PyObject *) PySet_New(PyObject *); PyAPI_FUNC(PyObject *) PyFrozenSet_New(PyObject *); PyAPI_FUNC(Py_ssize_t) PySet_Size(PyObject *anyset); #define PySet_GET_SIZE(so) (((PySetObject *)(so))->used) +PyAPI_FUNC(int) PySet_Clear(PyObject *set); PyAPI_FUNC(int) PySet_Contains(PyObject *anyset, PyObject *key); PyAPI_FUNC(int) PySet_Discard(PyObject *set, PyObject *key); PyAPI_FUNC(int) PySet_Add(PyObject *set, PyObject *key); +PyAPI_FUNC(int) _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **entry); PyAPI_FUNC(PyObject *) PySet_Pop(PyObject *set); +PyAPI_FUNC(int) _PySet_Update(PyObject *set, PyObject *iterable); #ifdef __cplusplus } diff --git a/Include/sliceobject.h b/Include/sliceobject.h index 17f36dc..dbc34b2 100644 --- a/Include/sliceobject.h +++ b/Include/sliceobject.h @@ -30,6 +30,7 @@ PyAPI_DATA(PyTypeObject) PySlice_Type; PyAPI_FUNC(PyObject *) PySlice_New(PyObject* start, PyObject* stop, PyObject* step); +PyAPI_FUNC(PyObject *) _PySlice_FromIndices(Py_ssize_t start, Py_ssize_t stop); PyAPI_FUNC(int) PySlice_GetIndices(PySliceObject *r, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step); PyAPI_FUNC(int) PySlice_GetIndicesEx(PySliceObject *r, Py_ssize_t length, diff --git a/LICENSE b/LICENSE index f0fc62a..5affefc 100644 --- a/LICENSE +++ b/LICENSE @@ -51,6 +51,8 @@ the various releases. 2.4 2.3 2004 PSF yes 2.4.1 2.4 2005 PSF yes 2.4.2 2.4.1 2005 PSF yes + 2.4.3 2.4.2 2006 PSF yes + 2.5 2.4 2006 PSF yes Footnotes: diff --git a/Lib/Queue.py b/Lib/Queue.py index c6c608b..51ad354 100644 --- a/Lib/Queue.py +++ b/Lib/Queue.py @@ -35,6 +35,51 @@ class Queue: # Notify not_full whenever an item is removed from the queue; # a thread waiting to put is notified then. self.not_full = threading.Condition(self.mutex) + # Notify all_tasks_done whenever the number of unfinished tasks + # drops to zero; thread waiting to join() is notified to resume + self.all_tasks_done = threading.Condition(self.mutex) + self.unfinished_tasks = 0 + + def task_done(self): + """Indicate that a formerly enqueued task is complete. + + Used by Queue consumer threads. For each get() used to fetch a task, + a subsequent call to task_done() tells the queue that the processing + on the task is complete. + + If a join() is currently blocking, it will resume when all items + have been processed (meaning that a task_done() call was received + for every item that had been put() into the queue). + + Raises a ValueError if called more times than there were items + placed in the queue. + """ + self.all_tasks_done.acquire() + try: + unfinished = self.unfinished_tasks - 1 + if unfinished <= 0: + if unfinished < 0: + raise ValueError('task_done() called too many times') + self.all_tasks_done.notifyAll() + self.unfinished_tasks = unfinished + finally: + self.all_tasks_done.release() + + def join(self): + """Blocks until all items in the Queue have been gotten and processed. + + The count of unfinished tasks goes up whenever an item is added to the + queue. The count goes down whenever a consumer thread calls task_done() + to indicate the item was retrieved and all work on it is complete. + + When the count of unfinished tasks drops to zero, join() unblocks. + """ + self.all_tasks_done.acquire() + try: + while self.unfinished_tasks: + self.all_tasks_done.wait() + finally: + self.all_tasks_done.release() def qsize(self): """Return the approximate size of the queue (not reliable!).""" @@ -86,6 +131,7 @@ class Queue: raise Full self.not_full.wait(remaining) self._put(item) + self.unfinished_tasks += 1 self.not_empty.notify() finally: self.not_full.release() diff --git a/Lib/SimpleXMLRPCServer.py b/Lib/SimpleXMLRPCServer.py index 156c2ba..a0b44e1 100644 --- a/Lib/SimpleXMLRPCServer.py +++ b/Lib/SimpleXMLRPCServer.py @@ -104,7 +104,11 @@ from xmlrpclib import Fault import SocketServer import BaseHTTPServer import sys -import os, fcntl +import os +try: + import fcntl +except ImportError: + fcntl = None def resolve_dotted_attribute(obj, attr, allow_dotted_names=True): """resolve_dotted_attribute(a, 'b.c.d') => a.b.c.d @@ -493,7 +497,7 @@ class SimpleXMLRPCServer(SocketServer.TCPServer, # [Bug #1222790] If possible, set close-on-exec flag; if a # method spawns a subprocess, the subprocess shouldn't have # the listening socket open. - if hasattr(fcntl, 'FD_CLOEXEC'): + if fcntl is not None and hasattr(fcntl, 'FD_CLOEXEC'): flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD) flags |= fcntl.FD_CLOEXEC fcntl.fcntl(self.fileno(), fcntl.F_SETFD, flags) diff --git a/Lib/__future__.py b/Lib/__future__.py index d95ce5f..d8e14d1 100644 --- a/Lib/__future__.py +++ b/Lib/__future__.py @@ -64,7 +64,7 @@ __all__ = ["all_feature_names"] + all_feature_names CO_NESTED = 0x0010 # nested_scopes CO_GENERATOR_ALLOWED = 0 # generators (obsolete, was 0x1000) CO_FUTURE_DIVISION = 0x2000 # division -CO_FUTURE_ABSIMPORT = 0x4000 # absolute_import +CO_FUTURE_ABSOLUTE_IMPORT = 0x4000 # perform absolute imports by default CO_FUTURE_WITH_STATEMENT = 0x8000 # with statement class _Feature: @@ -109,7 +109,7 @@ division = _Feature((2, 2, 0, "alpha", 2), absolute_import = _Feature((2, 5, 0, "alpha", 1), (2, 7, 0, "alpha", 0), - CO_FUTURE_ABSIMPORT) + CO_FUTURE_ABSOLUTE_IMPORT) with_statement = _Feature((2, 5, 0, "alpha", 1), (2, 6, 0, "alpha", 0), diff --git a/Lib/_threading_local.py b/Lib/_threading_local.py index 90717a8..f0ce857 100644 --- a/Lib/_threading_local.py +++ b/Lib/_threading_local.py @@ -1,9 +1,9 @@ -"""Thread-local objects +"""Thread-local objects. -(Note that this module provides a Python version of thread - threading.local class. Depending on the version of Python you're - using, there may be a faster one available. You should always import - the local class from threading.) +(Note that this module provides a Python version of the threading.local + class. Depending on the version of Python you're using, there may be a + faster one available. You should always import the `local` class from + `threading`.) Thread-local objects support the management of thread-local data. If you have data that you want to be local to a thread, simply create @@ -133,7 +133,17 @@ affects what we see: >>> del mydata """ -# Threading import is at end +__all__ = ["local"] + +# We need to use objects from the threading module, but the threading +# module may also want to use our `local` class, if support for locals +# isn't compiled in to the `thread` module. This creates potential problems +# with circular imports. For that reason, we don't import `threading` +# until the bottom of this file (a hack sufficient to worm around the +# potential problems). Note that almost all platforms do have support for +# locals in the `thread` module, and there is no circular import problem +# then, so problems introduced by fiddling the order of imports here won't +# manifest on most boxes. class _localbase(object): __slots__ = '_local__key', '_local__args', '_local__lock' @@ -202,36 +212,30 @@ class local(_localbase): finally: lock.release() + def __del__(self): + import threading - def __del__(): - threading_enumerate = enumerate - __getattribute__ = object.__getattribute__ - - def __del__(self): - key = __getattribute__(self, '_local__key') + key = object.__getattribute__(self, '_local__key') + try: + threads = list(threading.enumerate()) + except: + # If enumerate fails, as it seems to do during + # shutdown, we'll skip cleanup under the assumption + # that there is nothing to clean up. + return + + for thread in threads: try: - threads = list(threading_enumerate()) - except: - # if enumerate fails, as it seems to do during - # shutdown, we'll skip cleanup under the assumption - # that there is nothing to clean up - return - - for thread in threads: - try: - __dict__ = thread.__dict__ - except AttributeError: - # Thread is dying, rest in peace - continue - - if key in __dict__: - try: - del __dict__[key] - except KeyError: - pass # didn't have anything in this thread + __dict__ = thread.__dict__ + except AttributeError: + # Thread is dying, rest in peace. + continue - return __del__ - __del__ = __del__() + if key in __dict__: + try: + del __dict__[key] + except KeyError: + pass # didn't have anything in this thread -from threading import currentThread, enumerate, RLock +from threading import currentThread, RLock diff --git a/Lib/bdb.py b/Lib/bdb.py index 8f808cc..08b48c3 100644 --- a/Lib/bdb.py +++ b/Lib/bdb.py @@ -479,10 +479,10 @@ class Breakpoint: else: disp = 'keep ' if self.enabled: - disp = disp + 'yes' + disp = disp + 'yes ' else: - disp = disp + 'no ' - print '%-4dbreakpoint %s at %s:%d' % (self.number, disp, + disp = disp + 'no ' + print '%-4dbreakpoint %s at %s:%d' % (self.number, disp, self.file, self.line) if self.cond: print '\tstop only if %s' % (self.cond,) diff --git a/Lib/bsddb/__init__.py b/Lib/bsddb/__init__.py index d3ee773..c004c08 100644 --- a/Lib/bsddb/__init__.py +++ b/Lib/bsddb/__init__.py @@ -287,10 +287,9 @@ def hashopen(file, flag='c', mode=0666, pgsize=None, ffactor=None, nelem=None, cachesize=None, lorder=None, hflags=0): flags = _checkflag(flag, file) - e = _openDBEnv() + e = _openDBEnv(cachesize) d = db.DB(e) d.set_flags(hflags) - if cachesize is not None: d.set_cachesize(0, cachesize) if pgsize is not None: d.set_pagesize(pgsize) if lorder is not None: d.set_lorder(lorder) if ffactor is not None: d.set_h_ffactor(ffactor) @@ -305,9 +304,8 @@ def btopen(file, flag='c', mode=0666, pgsize=None, lorder=None): flags = _checkflag(flag, file) - e = _openDBEnv() + e = _openDBEnv(cachesize) d = db.DB(e) - if cachesize is not None: d.set_cachesize(0, cachesize) if pgsize is not None: d.set_pagesize(pgsize) if lorder is not None: d.set_lorder(lorder) d.set_flags(btflags) @@ -324,9 +322,8 @@ def rnopen(file, flag='c', mode=0666, rlen=None, delim=None, source=None, pad=None): flags = _checkflag(flag, file) - e = _openDBEnv() + e = _openDBEnv(cachesize) d = db.DB(e) - if cachesize is not None: d.set_cachesize(0, cachesize) if pgsize is not None: d.set_pagesize(pgsize) if lorder is not None: d.set_lorder(lorder) d.set_flags(rnflags) @@ -339,8 +336,13 @@ def rnopen(file, flag='c', mode=0666, #---------------------------------------------------------------------- -def _openDBEnv(): +def _openDBEnv(cachesize): e = db.DBEnv() + if cachesize is not None: + if cachesize >= 20480: + e.set_cachesize(0, cachesize) + else: + raise error, "cachesize must be >= 20480" e.open('.', db.DB_PRIVATE | db.DB_CREATE | db.DB_THREAD | db.DB_INIT_LOCK | db.DB_INIT_MPOOL) return e @@ -358,7 +360,7 @@ def _checkflag(flag, file): #flags = db.DB_CREATE | db.DB_TRUNCATE # we used db.DB_TRUNCATE flag for this before but BerkeleyDB # 4.2.52 changed to disallowed truncate with txn environments. - if os.path.isfile(file): + if file is not None and os.path.isfile(file): os.unlink(file) else: raise error, "flags should be one of 'r', 'w', 'c' or 'n'" diff --git a/Lib/bsddb/test/test_all.py b/Lib/bsddb/test/test_all.py index 972cd06..abfaf47 100644 --- a/Lib/bsddb/test/test_all.py +++ b/Lib/bsddb/test/test_all.py @@ -65,6 +65,7 @@ def suite(): 'test_join', 'test_lock', 'test_misc', + 'test_pickle', 'test_queue', 'test_recno', 'test_thread', diff --git a/Lib/bsddb/test/test_pickle.py b/Lib/bsddb/test/test_pickle.py new file mode 100644 index 0000000..3916e5c --- /dev/null +++ b/Lib/bsddb/test/test_pickle.py @@ -0,0 +1,75 @@ + +import sys, os, string +import pickle +try: + import cPickle +except ImportError: + cPickle = None +import unittest +import glob + +try: + # For Pythons w/distutils pybsddb + from bsddb3 import db +except ImportError, e: + # For Python 2.3 + from bsddb import db + + +#---------------------------------------------------------------------- + +class pickleTestCase(unittest.TestCase): + """Verify that DBError can be pickled and unpickled""" + db_home = 'db_home' + db_name = 'test-dbobj.db' + + def setUp(self): + homeDir = os.path.join(os.path.dirname(sys.argv[0]), 'db_home') + self.homeDir = homeDir + try: os.mkdir(homeDir) + except os.error: pass + + def tearDown(self): + if hasattr(self, 'db'): + del self.db + if hasattr(self, 'env'): + del self.env + files = glob.glob(os.path.join(self.homeDir, '*')) + for file in files: + os.remove(file) + + def _base_test_pickle_DBError(self, pickle): + self.env = db.DBEnv() + self.env.open(self.homeDir, db.DB_CREATE | db.DB_INIT_MPOOL) + self.db = db.DB(self.env) + self.db.open(self.db_name, db.DB_HASH, db.DB_CREATE) + self.db.put('spam', 'eggs') + assert self.db['spam'] == 'eggs' + try: + self.db.put('spam', 'ham', flags=db.DB_NOOVERWRITE) + except db.DBError, egg: + pickledEgg = pickle.dumps(egg) + #print repr(pickledEgg) + rottenEgg = pickle.loads(pickledEgg) + if rottenEgg.args != egg.args or type(rottenEgg) != type(egg): + raise Exception, (rottenEgg, '!=', egg) + else: + raise Exception, "where's my DBError exception?!?" + + self.db.close() + self.env.close() + + def test01_pickle_DBError(self): + self._base_test_pickle_DBError(pickle=pickle) + + if cPickle: + def test02_cPickle_DBError(self): + self._base_test_pickle_DBError(pickle=cPickle) + +#---------------------------------------------------------------------- + +def test_suite(): + return unittest.makeSuite(pickleTestCase) + +if __name__ == '__main__': + unittest.main(defaultTest='test_suite') diff --git a/Lib/calendar.py b/Lib/calendar.py index 3ffcff5..7800aae 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -5,17 +5,32 @@ default, these calendars have Monday as the first day of the week, and Sunday as the last (the European convention). Use setfirstweekday() to set the first day of the week (0=Monday, 6=Sunday).""" -import datetime +from __future__ import with_statement +import sys, datetime, locale -__all__ = ["error","setfirstweekday","firstweekday","isleap", - "leapdays","weekday","monthrange","monthcalendar", - "prmonth","month","prcal","calendar","timegm", - "month_name", "month_abbr", "day_name", "day_abbr", - "weekheader"] +__all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday", + "firstweekday", "isleap", "leapdays", "weekday", "monthrange", + "monthcalendar", "prmonth", "month", "prcal", "calendar", + "timegm", "month_name", "month_abbr", "day_name", "day_abbr"] # Exception raised for bad input (with string parameter for details) error = ValueError +# Exceptions raised for bad input +class IllegalMonthError(ValueError): + def __init__(self, month): + self.month = month + def __str__(self): + return "bad month number %r; must be 1-12" % self.month + + +class IllegalWeekdayError(ValueError): + def __init__(self, weekday): + self.weekday = weekday + def __str__(self): + return "bad weekday number %r; must be 0 (Monday) to 6 (Sunday)" % self.weekday + + # Constants for months referenced later January = 1 February = 2 @@ -30,7 +45,7 @@ mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] class _localized_month: - _months = [datetime.date(2001, i+1, 1).strftime for i in range(12)] + _months = [datetime.date(2001, i+1, 1).strftime for i in xrange(12)] _months.insert(0, lambda x: "") def __init__(self, format): @@ -46,10 +61,11 @@ class _localized_month: def __len__(self): return 13 + class _localized_day: # January 1, 2001, was a Monday. - _days = [datetime.date(2001, 1, i+1).strftime for i in range(7)] + _days = [datetime.date(2001, 1, i+1).strftime for i in xrange(7)] def __init__(self, format): self.format = format @@ -64,6 +80,7 @@ class _localized_day: def __len__(self): return 7 + # Full and abbreviated names of weekdays day_name = _localized_day('%A') day_abbr = _localized_day('%a') @@ -75,23 +92,12 @@ month_abbr = _localized_month('%b') # Constants for weekdays (MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY) = range(7) -_firstweekday = 0 # 0 = Monday, 6 = Sunday - -def firstweekday(): - return _firstweekday - -def setfirstweekday(weekday): - """Set weekday (Monday=0, Sunday=6) to start each week.""" - global _firstweekday - if not MONDAY <= weekday <= SUNDAY: - raise ValueError, \ - 'bad weekday number; must be 0 (Monday) to 6 (Sunday)' - _firstweekday = weekday def isleap(year): """Return 1 for leap years, 0 for non-leap years.""" return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) + def leapdays(y1, y2): """Return number of leap years in range [y1, y2). Assume y1 <= y2.""" @@ -99,128 +105,501 @@ def leapdays(y1, y2): y2 -= 1 return (y2//4 - y1//4) - (y2//100 - y1//100) + (y2//400 - y1//400) + def weekday(year, month, day): """Return weekday (0-6 ~ Mon-Sun) for year (1970-...), month (1-12), day (1-31).""" return datetime.date(year, month, day).weekday() + def monthrange(year, month): """Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for year, month.""" if not 1 <= month <= 12: - raise ValueError, 'bad month number' + raise IllegalMonthError(month) day1 = weekday(year, month, 1) ndays = mdays[month] + (month == February and isleap(year)) return day1, ndays -def monthcalendar(year, month): - """Return a matrix representing a month's calendar. - Each row represents a week; days outside this month are zero.""" - day1, ndays = monthrange(year, month) - rows = [] - r7 = range(7) - day = (_firstweekday - day1 + 6) % 7 - 5 # for leading 0's in first week - while day <= ndays: - row = [0, 0, 0, 0, 0, 0, 0] - for i in r7: - if 1 <= day <= ndays: row[i] = day - day = day + 1 - rows.append(row) - return rows - -def prweek(theweek, width): - """Print a single week (no newline).""" - print week(theweek, width), - -def week(theweek, width): - """Returns a single week in a string (no newline).""" - days = [] - for day in theweek: + +class Calendar(object): + """ + Base calendar class. This class doesn't do any formatting. It simply + provides data to subclasses. + """ + + def __init__(self, firstweekday=0): + self.firstweekday = firstweekday # 0 = Monday, 6 = Sunday + + def getfirstweekday(self): + return self._firstweekday % 7 + + def setfirstweekday(self, firstweekday): + self._firstweekday = firstweekday + + firstweekday = property(getfirstweekday, setfirstweekday) + + def iterweekdays(self): + """ + Return a iterator for one week of weekday numbers starting with the + configured first one. + """ + for i in xrange(self.firstweekday, self.firstweekday + 7): + yield i%7 + + def itermonthdates(self, year, month): + """ + Return an iterator for one month. The iterator will yield datetime.date + values and will always iterate through complete weeks, so it will yield + dates outside the specified month. + """ + date = datetime.date(year, month, 1) + # Go back to the beginning of the week + days = (date.weekday() - self.firstweekday) % 7 + date -= datetime.timedelta(days=days) + oneday = datetime.timedelta(days=1) + while True: + yield date + date += oneday + if date.month != month and date.weekday() == self.firstweekday: + break + + def itermonthdays2(self, year, month): + """ + Like itermonthdates(), but will yield (day number, weekday number) + tuples. For days outside the specified month the day number is 0. + """ + for date in self.itermonthdates(year, month): + if date.month != month: + yield (0, date.weekday()) + else: + yield (date.day, date.weekday()) + + def itermonthdays(self, year, month): + """ + Like itermonthdates(), but will yield day numbers tuples. For days + outside the specified month the day number is 0. + """ + for date in self.itermonthdates(year, month): + if date.month != month: + yield 0 + else: + yield date.day + + def monthdatescalendar(self, year, month): + """ + Return a matrix (list of lists) representing a month's calendar. + Each row represents a week; week entries are datetime.date values. + """ + dates = list(self.itermonthdates(year, month)) + return [ dates[i:i+7] for i in xrange(0, len(dates), 7) ] + + def monthdays2calendar(self, year, month): + """ + Return a matrix representing a month's calendar. + Each row represents a week; week entries are + (day number, weekday number) tuples. Day numbers outside this month + are zero. + """ + days = list(self.itermonthdays2(year, month)) + return [ days[i:i+7] for i in xrange(0, len(days), 7) ] + + def monthdayscalendar(self, year, month): + """ + Return a matrix representing a month's calendar. + Each row represents a week; days outside this month are zero. + """ + days = list(self.itermonthdays(year, month)) + return [ days[i:i+7] for i in xrange(0, len(days), 7) ] + + def yeardatescalendar(self, year, width=3): + """ + Return the data for the specified year ready for formatting. The return + value is a list of month rows. Each month row contains upto width months. + Each month contains between 4 and 6 weeks and each week contains 1-7 + days. Days are datetime.date objects. + """ + months = [ + self.monthdatescalendar(year, i) + for i in xrange(January, January+12) + ] + return [months[i:i+width] for i in xrange(0, len(months), width) ] + + def yeardays2calendar(self, year, width=3): + """ + Return the data for the specified year ready for formatting (similar to + yeardatescalendar()). Entries in the week lists are + (day number, weekday number) tuples. Day numbers outside this month are + zero. + """ + months = [ + self.monthdays2calendar(year, i) + for i in xrange(January, January+12) + ] + return [months[i:i+width] for i in xrange(0, len(months), width) ] + + def yeardayscalendar(self, year, width=3): + """ + Return the data for the specified year ready for formatting (similar to + yeardatescalendar()). Entries in the week lists are day numbers. + Day numbers outside this month are zero. + """ + months = [ + self.monthdayscalendar(year, i) + for i in xrange(January, January+12) + ] + return [months[i:i+width] for i in xrange(0, len(months), width) ] + + +class TextCalendar(Calendar): + """ + Subclass of Calendar that outputs a calendar as a simple plain text + similar to the UNIX program cal. + """ + + def prweek(self, theweek, width): + """ + Print a single week (no newline). + """ + print self.week(theweek, width), + + def formatday(self, day, weekday, width): + """ + Returns a formatted day. + """ if day == 0: s = '' else: s = '%2i' % day # right-align single-digit days - days.append(s.center(width)) - return ' '.join(days) - -def weekheader(width): - """Return a header for a week.""" - if width >= 9: - names = day_name - else: - names = day_abbr - days = [] - for i in range(_firstweekday, _firstweekday + 7): - days.append(names[i%7][:width].center(width)) - return ' '.join(days) - -def prmonth(theyear, themonth, w=0, l=0): - """Print a month's calendar.""" - print month(theyear, themonth, w, l), - -def month(theyear, themonth, w=0, l=0): - """Return a month's calendar string (multi-line).""" - w = max(2, w) - l = max(1, l) - s = ("%s %r" % (month_name[themonth], theyear)).center( - 7 * (w + 1) - 1).rstrip() + \ - '\n' * l + weekheader(w).rstrip() + '\n' * l - for aweek in monthcalendar(theyear, themonth): - s = s + week(aweek, w).rstrip() + '\n' * l - return s[:-l] + '\n' - -# Spacing of month columns for 3-column year calendar + return s.center(width) + + def formatweek(self, theweek, width): + """ + Returns a single week in a string (no newline). + """ + return ' '.join(self.formatday(d, wd, width) for (d, wd) in theweek) + + def formatweekday(self, day, width): + """ + Returns a formatted week day name. + """ + if width >= 9: + names = day_name + else: + names = day_abbr + return names[day][:width].center(width) + + def formatweekheader(self, width): + """ + Return a header for a week. + """ + return ' '.join(self.formatweekday(i, width) for i in self.iterweekdays()) + + def formatmonthname(self, theyear, themonth, width, withyear=True): + """ + Return a formatted month name. + """ + s = month_name[themonth] + if withyear: + s = "%s %r" % (s, theyear) + return s.center(width) + + def prmonth(self, theyear, themonth, w=0, l=0): + """ + Print a month's calendar. + """ + print self.formatmonth(theyear, themonth, w, l), + + def formatmonth(self, theyear, themonth, w=0, l=0): + """ + Return a month's calendar string (multi-line). + """ + w = max(2, w) + l = max(1, l) + s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1) + s = s.rstrip() + s += '\n' * l + s += self.formatweekheader(w).rstrip() + s += '\n' * l + for week in self.monthdays2calendar(theyear, themonth): + s += self.formatweek(week, w).rstrip() + s += '\n' * l + return s + + def formatyear(self, theyear, w=2, l=1, c=6, m=3): + """ + Returns a year's calendar as a multi-line string. + """ + w = max(2, w) + l = max(1, l) + c = max(2, c) + colwidth = (w + 1) * 7 - 1 + v = [] + a = v.append + a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip()) + a('\n'*l) + header = self.formatweekheader(w) + for (i, row) in enumerate(self.yeardays2calendar(theyear, m)): + # months in this row + months = xrange(m*i+1, min(m*(i+1)+1, 13)) + a('\n'*l) + names = (self.formatmonthname(theyear, k, colwidth, False) + for k in months) + a(formatstring(names, colwidth, c).rstrip()) + a('\n'*l) + headers = (header for k in months) + a(formatstring(headers, colwidth, c).rstrip()) + a('\n'*l) + # max number of weeks for this row + height = max(len(cal) for cal in row) + for j in xrange(height): + weeks = [] + for cal in row: + if j >= len(cal): + weeks.append('') + else: + weeks.append(self.formatweek(cal[j], w)) + a(formatstring(weeks, colwidth, c).rstrip()) + a('\n' * l) + return ''.join(v) + + def pryear(self, theyear, w=0, l=0, c=6, m=3): + """Print a year's calendar.""" + print self.formatyear(theyear, w, l, c, m) + + +class HTMLCalendar(Calendar): + """ + This calendar returns complete HTML pages. + """ + + # CSS classes for the day s + cssclasses = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"] + + def formatday(self, day, weekday): + """ + Return a day as a table cell. + """ + if day == 0: + return ' ' # day outside month + else: + return '%d' % (self.cssclasses[weekday], day) + + def formatweek(self, theweek): + """ + Return a complete week as a table row. + """ + s = ''.join(self.formatday(d, wd) for (d, wd) in theweek) + return '%s' % s + + def formatweekday(self, day): + """ + Return a weekday name as a table header. + """ + return '%s' % (self.cssclasses[day], day_abbr[day]) + + def formatweekheader(self): + """ + Return a header for a week as a table row. + """ + s = ''.join(self.formatweekday(i) for i in self.iterweekdays()) + return '%s' % s + + def formatmonthname(self, theyear, themonth, withyear=True): + """ + Return a month name as a table row. + """ + if withyear: + s = '%s %s' % (month_name[themonth], theyear) + else: + s = '%s' % month_name[themonth] + return '%s' % s + + def formatmonth(self, theyear, themonth, withyear=True): + """ + Return a formatted month as a table. + """ + v = [] + a = v.append + a('') + a('\n') + a(self.formatmonthname(theyear, themonth, withyear=withyear)) + a('\n') + a(self.formatweekheader()) + a('\n') + for week in self.monthdays2calendar(theyear, themonth): + a(self.formatweek(week)) + a('\n') + a('
') + a('\n') + return ''.join(v) + + def formatyear(self, theyear, width=3): + """ + Return a formatted year as a table of tables. + """ + v = [] + a = v.append + width = max(width, 1) + a('') + a('\n') + a('' % (width, theyear)) + for i in xrange(January, January+12, width): + # months in this row + months = xrange(i, min(i+width, 13)) + a('') + for m in months: + a('') + a('') + a('
%s
') + a(self.formatmonth(theyear, m, withyear=False)) + a('
') + return ''.join(v) + + def formatyearpage(self, theyear, width=3, css='calendar.css', encoding=None): + """ + Return a formatted year as a complete HTML page. + """ + if encoding is None: + encoding = sys.getdefaultencoding() + v = [] + a = v.append + a('\n' % encoding) + a('\n') + a('\n') + a('\n') + a('\n' % encoding) + if css is not None: + a('\n' % css) + a('Calendar for %d</title\n' % theyear) + a('</head>\n') + a('<body>\n') + a(self.formatyear(theyear, width)) + a('</body>\n') + a('</html>\n') + return ''.join(v).encode(encoding, "xmlcharrefreplace") + + +class TimeEncoding: + def __init__(self, locale): + self.locale = locale + + def __context__(self): + return self + + def __enter__(self): + self.oldlocale = locale.setlocale(locale.LC_TIME, self.locale) + return locale.getlocale(locale.LC_TIME)[1] + + def __exit__(self, *args): + locale.setlocale(locale.LC_TIME, self.oldlocale) + + +class LocaleTextCalendar(TextCalendar): + """ + This class can be passed a locale name in the constructor and will return + month and weekday names in the specified locale. If this locale includes + an encoding all strings containing month and weekday names will be returned + as unicode. + """ + + def __init__(self, firstweekday=0, locale=None): + TextCalendar.__init__(self, firstweekday) + if locale is None: + locale = locale.getdefaultlocale() + self.locale = locale + + def formatweekday(self, day, width): + with TimeEncoding(self.locale) as encoding: + if width >= 9: + names = day_name + else: + names = day_abbr + name = names[day] + if encoding is not None: + name = name.decode(encoding) + return name[:width].center(width) + + def formatmonthname(self, theyear, themonth, width, withyear=True): + with TimeEncoding(self.locale) as encoding: + s = month_name[themonth] + if encoding is not None: + s = s.decode(encoding) + if withyear: + s = "%s %r" % (s, theyear) + return s.center(width) + + +class LocaleHTMLCalendar(HTMLCalendar): + """ + This class can be passed a locale name in the constructor and will return + month and weekday names in the specified locale. If this locale includes + an encoding all strings containing month and weekday names will be returned + as unicode. + """ + def __init__(self, firstweekday=0, locale=None): + HTMLCalendar.__init__(self, firstweekday) + if locale is None: + locale = locale.getdefaultlocale() + self.locale = locale + + def formatweekday(self, day): + with TimeEncoding(self.locale) as encoding: + s = day_abbr[day] + if encoding is not None: + s = s.decode(encoding) + return '<th class="%s">%s</th>' % (self.cssclasses[day], s) + + def formatmonthname(self, theyear, themonth, withyear=True): + with TimeEncoding(self.locale) as encoding: + s = month_name[themonth] + if encoding is not None: + s = s.decode(encoding) + if withyear: + s = '%s %s' % (s, theyear) + return '<tr><th colspan="7" class="month">%s</th></tr>' % s + + +# Support for old module level interface +c = TextCalendar() + +firstweekday = c.getfirstweekday + +def setfirstweekday(firstweekday): + if not MONDAY <= firstweekday <= SUNDAY: + raise IllegalWeekdayError(firstweekday) + c.firstweekday = firstweekday + +monthcalendar = c.monthdayscalendar +prweek = c.prweek +week = c.formatweek +weekheader = c.formatweekheader +prmonth = c.prmonth +month = c.formatmonth +calendar = c.formatyear +prcal = c.pryear + + +# Spacing of month columns for multi-column year calendar _colwidth = 7*3 - 1 # Amount printed by prweek() _spacing = 6 # Number of spaces between columns -def format3c(a, b, c, colwidth=_colwidth, spacing=_spacing): - """Prints 3-column formatting for year calendars""" - print format3cstring(a, b, c, colwidth, spacing) - -def format3cstring(a, b, c, colwidth=_colwidth, spacing=_spacing): - """Returns a string formatted from 3 strings, centered within 3 columns.""" - return (a.center(colwidth) + ' ' * spacing + b.center(colwidth) + - ' ' * spacing + c.center(colwidth)) - -def prcal(year, w=0, l=0, c=_spacing): - """Print a year's calendar.""" - print calendar(year, w, l, c), - -def calendar(year, w=0, l=0, c=_spacing): - """Returns a year's calendar as a multi-line string.""" - w = max(2, w) - l = max(1, l) - c = max(2, c) - colwidth = (w + 1) * 7 - 1 - s = repr(year).center(colwidth * 3 + c * 2).rstrip() + '\n' * l - header = weekheader(w) - header = format3cstring(header, header, header, colwidth, c).rstrip() - for q in range(January, January+12, 3): - s = (s + '\n' * l + - format3cstring(month_name[q], month_name[q+1], month_name[q+2], - colwidth, c).rstrip() + - '\n' * l + header + '\n' * l) - data = [] - height = 0 - for amonth in range(q, q + 3): - cal = monthcalendar(year, amonth) - if len(cal) > height: - height = len(cal) - data.append(cal) - for i in range(height): - weeks = [] - for cal in data: - if i >= len(cal): - weeks.append('') - else: - weeks.append(week(cal[i], w)) - s = s + format3cstring(weeks[0], weeks[1], weeks[2], - colwidth, c).rstrip() + '\n' * l - return s[:-l] + '\n' + +def format(cols, colwidth=_colwidth, spacing=_spacing): + """Prints multi-column formatting for year calendars""" + print formatstring(cols, colwidth, spacing) + + +def formatstring(cols, colwidth=_colwidth, spacing=_spacing): + """Returns a string formatted from n strings, centered within n columns.""" + spacing *= ' ' + return spacing.join(c.center(colwidth) for c in cols) + EPOCH = 1970 _EPOCH_ORD = datetime.date(EPOCH, 1, 1).toordinal() + def timegm(tuple): """Unrelated but handy function to calculate Unix timestamp from GMT.""" year, month, day, hour, minute, second = tuple[:6] @@ -229,3 +608,97 @@ def timegm(tuple): minutes = hours*60 + minute seconds = minutes*60 + second return seconds + + +def main(args): + import optparse + parser = optparse.OptionParser(usage="usage: %prog [options] [year [month]]") + parser.add_option( + "-w", "--width", + dest="width", type="int", default=2, + help="width of date column (default 2, text only)" + ) + parser.add_option( + "-l", "--lines", + dest="lines", type="int", default=1, + help="number of lines for each week (default 1, text only)" + ) + parser.add_option( + "-s", "--spacing", + dest="spacing", type="int", default=6, + help="spacing between months (default 6, text only)" + ) + parser.add_option( + "-m", "--months", + dest="months", type="int", default=3, + help="months per row (default 3, text only)" + ) + parser.add_option( + "-c", "--css", + dest="css", default="calendar.css", + help="CSS to use for page (html only)" + ) + parser.add_option( + "-L", "--locale", + dest="locale", default=None, + help="locale to be used from month and weekday names" + ) + parser.add_option( + "-e", "--encoding", + dest="encoding", default=None, + help="Encoding to use for output" + ) + parser.add_option( + "-t", "--type", + dest="type", default="text", + choices=("text", "html"), + help="output type (text or html)" + ) + + (options, args) = parser.parse_args(args) + + if options.locale and not options.encoding: + parser.error("if --locale is specified --encoding is required") + sys.exit(1) + + if options.type == "html": + if options.locale: + cal = LocaleHTMLCalendar(locale=options.locale) + else: + cal = HTMLCalendar() + encoding = options.encoding + if encoding is None: + encoding = sys.getdefaultencoding() + optdict = dict(encoding=encoding, css=options.css) + if len(args) == 1: + print cal.formatyearpage(datetime.date.today().year, **optdict) + elif len(args) == 2: + print cal.formatyearpage(int(args[1]), **optdict) + else: + parser.error("incorrect number of arguments") + sys.exit(1) + else: + if options.locale: + cal = LocaleTextCalendar(locale=options.locale) + else: + cal = TextCalendar() + optdict = dict(w=options.width, l=options.lines) + if len(args) != 3: + optdict["c"] = options.spacing + optdict["m"] = options.months + if len(args) == 1: + result = cal.formatyear(datetime.date.today().year, **optdict) + elif len(args) == 2: + result = cal.formatyear(int(args[1]), **optdict) + elif len(args) == 3: + result = cal.formatmonth(int(args[1]), int(args[2]), **optdict) + else: + parser.error("incorrect number of arguments") + sys.exit(1) + if options.encoding: + result = result.encode(options.encoding) + print result + + +if __name__ == "__main__": + main(sys.argv) diff --git a/Lib/codecs.py b/Lib/codecs.py index 28856c7..1518d75 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -14,8 +14,7 @@ import __builtin__, sys try: from _codecs import * except ImportError, why: - raise SystemError,\ - 'Failed to load the builtin codecs: %s' % why + raise SystemError('Failed to load the builtin codecs: %s' % why) __all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE", "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", @@ -156,13 +155,13 @@ class Codec: class IncrementalEncoder(object): """ - A IncrementalEncoder encodes an input in multiple steps. The input can be + An IncrementalEncoder encodes an input in multiple steps. The input can be passed piece by piece to the encode() method. The IncrementalEncoder remembers the state of the Encoding process between calls to encode(). """ def __init__(self, errors='strict'): """ - Creates a IncrementalEncoder instance. + Creates an IncrementalEncoder instance. The IncrementalEncoder may use different error handling schemes by providing the errors keyword argument. See the module docstring @@ -182,6 +181,33 @@ class IncrementalEncoder(object): Resets the encoder to the initial state. """ +class BufferedIncrementalEncoder(IncrementalEncoder): + """ + This subclass of IncrementalEncoder can be used as the baseclass for an + incremental encoder if the encoder must keep some of the output in a + buffer between calls to encode(). + """ + def __init__(self, errors='strict'): + IncrementalEncoder.__init__(self, errors) + self.buffer = "" # unencoded input that is kept between calls to encode() + + def _buffer_encode(self, input, errors, final): + # Overwrite this method in subclasses: It must encode input + # and return an (output, length consumed) tuple + raise NotImplementedError + + def encode(self, input, final=False): + # encode input (taking the buffer into account) + data = self.buffer + input + (result, consumed) = self._buffer_encode(data, self.errors, final) + # keep unencoded input until the next call + self.buffer = data[consumed:] + return result + + def reset(self): + IncrementalEncoder.reset(self) + self.buffer = "" + class IncrementalDecoder(object): """ An IncrementalDecoder decodes an input in multiple steps. The input can be @@ -234,7 +260,7 @@ class BufferedIncrementalDecoder(IncrementalDecoder): def reset(self): IncrementalDecoder.reset(self) - self.bytebuffer = "" + self.buffer = "" # # The StreamWriter and StreamReader class provide generic working diff --git a/Lib/contextlib.py b/Lib/contextlib.py index 0a5d608..aa5335d 100644 --- a/Lib/contextlib.py +++ b/Lib/contextlib.py @@ -30,9 +30,22 @@ class GeneratorContextManager(object): else: try: self.gen.throw(type, value, traceback) - return True - except StopIteration: - return True + raise RuntimeError("generator didn't stop after throw()") + except StopIteration, exc: + # Suppress the exception *unless* it's the same exception that + # was passed to throw(). This prevents a StopIteration + # raised inside the "with" statement from being suppressed + return exc is not value + except: + # only re-raise if it's *not* the exception that was + # passed to throw(), because __exit__() must not raise + # an exception unless __exit__() itself failed. But throw() + # has to raise the exception to signal propagation, so this + # fixes the impedance mismatch between the throw() protocol + # and the __exit__() protocol. + # + if sys.exc_info()[1] is not value: + raise def contextmanager(func): @@ -68,6 +81,7 @@ def contextmanager(func): try: helper.__name__ = func.__name__ helper.__doc__ = func.__doc__ + helper.__dict__ = func.__dict__ except: pass return helper diff --git a/Lib/copy_reg.py b/Lib/copy_reg.py index 169520d..f4661ed 100644 --- a/Lib/copy_reg.py +++ b/Lib/copy_reg.py @@ -111,8 +111,19 @@ def _slotnames(cls): # Slots found -- gather slot names from all base classes for c in cls.__mro__: if "__slots__" in c.__dict__: - names += [name for name in c.__dict__["__slots__"] - if name not in ("__dict__", "__weakref__")] + slots = c.__dict__['__slots__'] + # if class has a single slot, it can be given as a string + if isinstance(slots, basestring): + slots = (slots,) + for name in slots: + # special descriptors + if name in ("__dict__", "__weakref__"): + continue + # mangled names + elif name.startswith('__') and not name.endswith('__'): + names.append('_%s%s' % (c.__name__, name)) + else: + names.append(name) # Cache the outcome in the class if at all possible try: diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index dd0f640..28ac180 100644 --- a/Lib/ctypes/__init__.py +++ b/Lib/ctypes/__init__.py @@ -9,11 +9,7 @@ from _ctypes import Union, Structure, Array from _ctypes import _Pointer from _ctypes import CFuncPtr as _CFuncPtr from _ctypes import __version__ as _ctypes_version -try: - from _ctypes import RTLD_LOCAL, RTLD_GLOBAL -except (ImportError, AttributeError): - RTLD_GLOBAL = RTLD_LOCAL = None - +from _ctypes import RTLD_LOCAL, RTLD_GLOBAL from _ctypes import ArgumentError from struct import calcsize as _calcsize @@ -304,10 +300,11 @@ class CDLL(object): raise AttributeError, name return self.__getitem__(name) - def __getitem__(self, name): - func = self._FuncPtr(name, self) - func.__name__ = name - setattr(self, name, func) + def __getitem__(self, name_or_ordinal): + func = self._FuncPtr((name_or_ordinal, self)) + if not isinstance(name_or_ordinal, (int, long)): + func.__name__ = name_or_ordinal + setattr(self, name_or_ordinal, func) return func class PyDLL(CDLL): @@ -384,21 +381,29 @@ if _os.name in ("nt", "ce"): _pointer_type_cache[None] = c_void_p -# functions - -from _ctypes import _memmove_addr, _memset_addr, _string_at_addr, cast - if sizeof(c_uint) == sizeof(c_void_p): c_size_t = c_uint elif sizeof(c_ulong) == sizeof(c_void_p): c_size_t = c_ulong +# functions + +from _ctypes import _memmove_addr, _memset_addr, _string_at_addr, _cast_addr + ## void *memmove(void *, const void *, size_t); memmove = CFUNCTYPE(c_void_p, c_void_p, c_void_p, c_size_t)(_memmove_addr) ## void *memset(void *, int, size_t) memset = CFUNCTYPE(c_void_p, c_void_p, c_int, c_size_t)(_memset_addr) +def PYFUNCTYPE(restype, *argtypes): + class CFunctionType(_CFuncPtr): + _argtypes_ = argtypes + _restype_ = restype + _flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI + return CFunctionType +cast = PYFUNCTYPE(py_object, c_void_p, py_object)(_cast_addr) + _string_at = CFUNCTYPE(py_object, c_void_p, c_int)(_string_at_addr) def string_at(ptr, size=0): """string_at(addr[, size]) -> string diff --git a/Lib/ctypes/_loader.py b/Lib/ctypes/_loader.py index 7bde6c6..7a48c1c 100644 --- a/Lib/ctypes/_loader.py +++ b/Lib/ctypes/_loader.py @@ -1,14 +1,11 @@ -# WORK IN PROGRESS! DO NOT (yet) USE! import sys, os import ctypes -__all__ = ["LibraryLoader", "RTLD_LOCAL", "RTLD_GLOBAL"] - if os.name in ("nt", "ce"): from _ctypes import LoadLibrary as dlopen - RTLD_LOCAL = RTLD_GLOBAL = None else: - from _ctypes import dlopen, RTLD_LOCAL, RTLD_GLOBAL + from _ctypes import dlopen +from _ctypes import RTLD_LOCAL, RTLD_GLOBAL # _findLib(name) returns an iterable of possible names for a library. if os.name in ("nt", "ce"): @@ -56,7 +53,10 @@ elif os.name == "posix": expr = '/[^\(\)\s]*lib%s\.[^\(\)\s]*' % name res = re.search(expr, os.popen('/sbin/ldconfig -p 2>/dev/null').read()) if not res: - return None + cmd = 'ldd %s 2>/dev/null' % sys.executable + res = re.search(expr, os.popen(cmd).read()) + if not res: + return None return res.group(0) def _get_soname(f): diff --git a/Lib/ctypes/test/test_byteswap.py b/Lib/ctypes/test/test_byteswap.py index 1b31f90..d0ada40 100644 --- a/Lib/ctypes/test/test_byteswap.py +++ b/Lib/ctypes/test/test_byteswap.py @@ -149,7 +149,7 @@ class Test(unittest.TestCase): self.failUnless(c_char.__ctype_le__ is c_char) self.failUnless(c_char.__ctype_be__ is c_char) - def test_struct_fields(self): + def test_struct_fields_1(self): if sys.byteorder == "little": base = BigEndianStructure else: @@ -198,17 +198,20 @@ class Test(unittest.TestCase): pass self.assertRaises(TypeError, setattr, S, "_fields_", [("s", T)]) - # crashes on solaris with a core dump. - def X_test_struct_fields(self): + def test_struct_fields_2(self): + # standard packing in struct uses no alignment. + # So, we have to align using pad bytes. + # + # Unaligned accesses will crash Python (on those platforms that + # don't allow it, like sparc solaris). if sys.byteorder == "little": base = BigEndianStructure - fmt = ">bhid" + fmt = ">bxhid" else: base = LittleEndianStructure - fmt = "<bhid" + fmt = "<bxhid" class S(base): - _pack_ = 1 # struct with '<' or '>' uses standard alignment. _fields_ = [("b", c_byte), ("h", c_short), ("i", c_int), @@ -218,5 +221,60 @@ class Test(unittest.TestCase): s2 = struct.pack(fmt, 0x12, 0x1234, 0x12345678, 3.14) self.failUnlessEqual(bin(s1), bin(s2)) + def test_unaligned_nonnative_struct_fields(self): + if sys.byteorder == "little": + base = BigEndianStructure + fmt = ">b h xi xd" + else: + base = LittleEndianStructure + fmt = "<b h xi xd" + + class S(base): + _pack_ = 1 + _fields_ = [("b", c_byte), + + ("h", c_short), + + ("_1", c_byte), + ("i", c_int), + + ("_2", c_byte), + ("d", c_double)] + + s1 = S() + s1.b = 0x12 + s1.h = 0x1234 + s1.i = 0x12345678 + s1.d = 3.14 + s2 = struct.pack(fmt, 0x12, 0x1234, 0x12345678, 3.14) + self.failUnlessEqual(bin(s1), bin(s2)) + + def test_unaligned_native_struct_fields(self): + if sys.byteorder == "little": + fmt = "<b h xi xd" + else: + base = LittleEndianStructure + fmt = ">b h xi xd" + + class S(Structure): + _pack_ = 1 + _fields_ = [("b", c_byte), + + ("h", c_short), + + ("_1", c_byte), + ("i", c_int), + + ("_2", c_byte), + ("d", c_double)] + + s1 = S() + s1.b = 0x12 + s1.h = 0x1234 + s1.i = 0x12345678 + s1.d = 3.14 + s2 = struct.pack(fmt, 0x12, 0x1234, 0x12345678, 3.14) + self.failUnlessEqual(bin(s1), bin(s2)) + if __name__ == "__main__": unittest.main() diff --git a/Lib/ctypes/test/test_cfuncs.py b/Lib/ctypes/test/test_cfuncs.py index 7c2b28b..6e0798d 100644 --- a/Lib/ctypes/test/test_cfuncs.py +++ b/Lib/ctypes/test/test_cfuncs.py @@ -179,7 +179,7 @@ else: def __getattr__(self, name): if name[:2] == '__' and name[-2:] == '__': raise AttributeError, name - func = self._FuncPtr("s_" + name, self) + func = self._FuncPtr(("s_" + name, self)) setattr(self, name, func) return func diff --git a/Lib/ctypes/test/test_keeprefs.py b/Lib/ctypes/test/test_keeprefs.py index 39e70e3..7318f29 100644 --- a/Lib/ctypes/test/test_keeprefs.py +++ b/Lib/ctypes/test/test_keeprefs.py @@ -140,5 +140,10 @@ class PointerToStructure(unittest.TestCase): r.a[0].x = 42 r.a[0].y = 99 + # to avoid leaking when tests are run several times + # clean up the types left in the cache. + from ctypes import _pointer_type_cache + del _pointer_type_cache[POINT] + if __name__ == "__main__": unittest.main() diff --git a/Lib/ctypes/test/test_loading.py b/Lib/ctypes/test/test_loading.py index 80564b8..4558417 100644 --- a/Lib/ctypes/test/test_loading.py +++ b/Lib/ctypes/test/test_loading.py @@ -2,40 +2,72 @@ from ctypes import * import sys, unittest import os, StringIO +libc_name = None +if os.name == "nt": + libc_name = "msvcrt" +elif os.name == "ce": + libc_name = "coredll" +elif sys.platform == "darwin": + libc_name = "libc.dylib" +elif sys.platform == "cygwin": + libc_name = "cygwin1.dll" +else: + for line in os.popen("ldd %s" % sys.executable): + if "libc.so" in line: + if sys.platform == "openbsd3": + libc_name = line.split()[4] + else: + libc_name = line.split()[2] +## print "libc_name is", libc_name + break + class LoaderTest(unittest.TestCase): unknowndll = "xxrandomnamexx" - def test_load(self): - if os.name == "nt": - name = "msvcrt" - elif os.name == "ce": - name = "coredll" - elif sys.platform == "darwin": - name = "libc.dylib" - elif sys.platform.startswith("freebsd"): - name = "libc.so" - elif sys.platform == "sunos5": - name = "libc.so" - else: - name = "libc.so.6" - cdll.load(name) - self.assertRaises(OSError, cdll.load, self.unknowndll) - - def test_load_version(self): - version = "6" - name = "c" - if sys.platform == "linux2": - cdll.load_version(name, version) + if libc_name is not None: + def test_load(self): + cdll.load(libc_name) + cdll.load(os.path.basename(libc_name)) + self.assertRaises(OSError, cdll.load, self.unknowndll) + + if libc_name is not None and os.path.basename(libc_name) == "libc.so.6": + def test_load_version(self): + cdll.load_version("c", "6") # linux uses version, libc 9 should not exist - self.assertRaises(OSError, cdll.load_version, name, "9") - self.assertRaises(OSError, cdll.load_version, self.unknowndll, "") + self.assertRaises(OSError, cdll.load_version, "c", "9") + self.assertRaises(OSError, cdll.load_version, self.unknowndll, "") - if os.name == "posix" and sys.platform != "sunos5": def test_find(self): name = "c" cdll.find(name) self.assertRaises(OSError, cdll.find, self.unknowndll) + if os.name in ("nt", "ce"): + def test_load_library(self): + if os.name == "nt": + windll.load_library("kernel32").GetModuleHandleW + windll.LoadLibrary("kernel32").GetModuleHandleW + WinDLL("kernel32").GetModuleHandleW + elif os.name == "ce": + windll.load_library("coredll").GetModuleHandleW + windll.LoadLibrary("coredll").GetModuleHandleW + WinDLL("coredll").GetModuleHandleW + + def test_load_ordinal_functions(self): + import _ctypes_test + dll = WinDLL(_ctypes_test.__file__) + # We load the same function both via ordinal and name + func_ord = dll[2] + func_name = dll.GetString + # addressof gets the address where the function pointer is stored + a_ord = addressof(func_ord) + a_name = addressof(func_name) + f_ord_addr = c_void_p.from_address(a_ord).value + f_name_addr = c_void_p.from_address(a_name).value + self.failUnlessEqual(hex(f_ord_addr), hex(f_name_addr)) + + self.failUnlessRaises(AttributeError, dll.__getitem__, 1234) + if __name__ == "__main__": unittest.main() diff --git a/Lib/ctypes/test/test_pointers.py b/Lib/ctypes/test/test_pointers.py index 6172abb..3a324a6 100644 --- a/Lib/ctypes/test/test_pointers.py +++ b/Lib/ctypes/test/test_pointers.py @@ -166,6 +166,18 @@ class PointersTestCase(unittest.TestCase): result = func( byref(argc), argv ) assert result == 'world', result + def test_bug_1467852(self): + # http://sourceforge.net/tracker/?func=detail&atid=532154&aid=1467852&group_id=71702 + x = c_int(5) + dummy = [] + for i in range(32000): + dummy.append(c_int(i)) + y = c_int(6) + p = pointer(x) + pp = pointer(p) + q = pointer(y) + pp[0] = q # <== + self.failUnlessEqual(p[0], 6) if __name__ == '__main__': unittest.main() diff --git a/Lib/ctypes/test/test_posix.py b/Lib/ctypes/test/test_posix.py index 2b4fdff..fe0a40a 100644 --- a/Lib/ctypes/test/test_posix.py +++ b/Lib/ctypes/test/test_posix.py @@ -8,8 +8,10 @@ if os.name == "posix" and sys.platform == "linux2": class TestRTLD_GLOBAL(unittest.TestCase): def test_GL(self): - cdll.load('libGL.so', mode=RTLD_GLOBAL) - cdll.load('libGLU.so') + if os.path.exists('/usr/lib/libGL.so'): + cdll.load('libGL.so', mode=RTLD_GLOBAL) + if os.path.exists('/usr/lib/libGLU.so'): + cdll.load('libGLU.so') ##if os.name == "posix" and sys.platform != "darwin": diff --git a/Lib/ctypes/test/test_prototypes.py b/Lib/ctypes/test/test_prototypes.py index 2c3d75b..47f5da1 100644 --- a/Lib/ctypes/test/test_prototypes.py +++ b/Lib/ctypes/test/test_prototypes.py @@ -24,6 +24,19 @@ import unittest import _ctypes_test testdll = cdll.load(_ctypes_test.__file__) +# Return machine address `a` as a (possibly long) non-negative integer. +# Starting with Python 2.5, id(anything) is always non-negative, and +# the ctypes addressof() inherits that via PyLong_FromVoidPtr(). +def positive_address(a): + if a >= 0: + return a + # View the bits in `a` as unsigned instead. + import struct + num_bits = struct.calcsize("P") * 8 # num bits in native machine address + a += 1L << num_bits + assert a >= 0 + return a + def c_wbuffer(init): n = len(init) + 1 return (c_wchar * n)(*init) @@ -43,7 +56,8 @@ class CharPointersTestCase(unittest.TestCase): ci = c_int(0) func.argtypes = POINTER(c_int), - self.failUnlessEqual(addressof(ci), func(byref(ci))) + self.failUnlessEqual(positive_address(addressof(ci)), + positive_address(func(byref(ci)))) func.argtypes = c_char_p, self.assertRaises(ArgumentError, func, byref(ci)) diff --git a/Lib/ctypes/test/test_random_things.py b/Lib/ctypes/test/test_random_things.py index cd50ca8..78a665b 100644 --- a/Lib/ctypes/test/test_random_things.py +++ b/Lib/ctypes/test/test_random_things.py @@ -51,16 +51,14 @@ class CallbackTracbackTestCase(unittest.TestCase): def test_IntegerDivisionError(self): cb = CFUNCTYPE(c_int, c_int)(callback_func) out = self.capture_stderr(cb, 0) - self.failUnlessEqual(out.splitlines()[-1], - "ZeroDivisionError: " - "integer division or modulo by zero") + self.failUnlessEqual(out.splitlines()[-1][:19], + "ZeroDivisionError: ") def test_FloatDivisionError(self): cb = CFUNCTYPE(c_int, c_double)(callback_func) out = self.capture_stderr(cb, 0.0) - self.failUnlessEqual(out.splitlines()[-1], - "ZeroDivisionError: " - "float division") + self.failUnlessEqual(out.splitlines()[-1][:19], + "ZeroDivisionError: ") def test_TypeErrorDivisionError(self): cb = CFUNCTYPE(c_int, c_char_p)(callback_func) diff --git a/Lib/ctypes/test/test_sizes.py b/Lib/ctypes/test/test_sizes.py index 6fb9ca0..208c00e 100644 --- a/Lib/ctypes/test/test_sizes.py +++ b/Lib/ctypes/test/test_sizes.py @@ -20,5 +20,8 @@ class SizesTestCase(unittest.TestCase): self.failUnlessEqual(8, sizeof(c_int64)) self.failUnlessEqual(8, sizeof(c_uint64)) + def test_size_t(self): + self.failUnlessEqual(sizeof(c_void_p), sizeof(c_size_t)) + if __name__ == "__main__": unittest.main() diff --git a/Lib/ctypes/test/test_unaligned_structures.py b/Lib/ctypes/test/test_unaligned_structures.py new file mode 100644 index 0000000..89343ba --- /dev/null +++ b/Lib/ctypes/test/test_unaligned_structures.py @@ -0,0 +1,45 @@ +import sys, unittest +from ctypes import * + +structures = [] +byteswapped_structures = [] + + +if sys.byteorder == "little": + SwappedStructure = BigEndianStructure +else: + SwappedStructure = LittleEndianStructure + +for typ in [c_short, c_int, c_long, c_longlong, + c_float, c_double, + c_ushort, c_uint, c_ulong, c_ulonglong]: + class X(Structure): + _pack_ = 1 + _fields_ = [("pad", c_byte), + ("value", typ)] + class Y(SwappedStructure): + _pack_ = 1 + _fields_ = [("pad", c_byte), + ("value", typ)] + structures.append(X) + byteswapped_structures.append(Y) + +class TestStructures(unittest.TestCase): + def test_native(self): + for typ in structures: +## print typ.value + self.failUnlessEqual(typ.value.offset, 1) + o = typ() + o.value = 4 + self.failUnlessEqual(o.value, 4) + + def test_swapped(self): + for typ in byteswapped_structures: +## print >> sys.stderr, typ.value + self.failUnlessEqual(typ.value.offset, 1) + o = typ() + o.value = 4 + self.failUnlessEqual(o.value, 4) + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/distutils/command/build_ext.py b/Lib/distutils/command/build_ext.py index 6ea5d57..5771252 100644 --- a/Lib/distutils/command/build_ext.py +++ b/Lib/distutils/command/build_ext.py @@ -185,7 +185,9 @@ class build_ext (Command): # for extensions under Cygwin and AtheOS Python's library directory must be # appended to library_dirs - if sys.platform[:6] == 'cygwin' or sys.platform[:6] == 'atheos': + if sys.platform[:6] == 'cygwin' or sys.platform[:6] == 'atheos' or \ + (sys.platform.startswith('linux') and + sysconfig.get_config_var('Py_ENABLE_SHARED')): if string.find(sys.executable, sys.exec_prefix) != -1: # building third party extensions self.library_dirs.append(os.path.join(sys.prefix, "lib", @@ -688,6 +690,13 @@ class build_ext (Command): # extensions, it is a reference to the original list return ext.libraries + [pythonlib, "m"] + extra else: - return ext.libraries + from distutils import sysconfig + if sysconfig.get_config_var('Py_ENABLE_SHARED'): + template = "python%d.%d" + pythonlib = (template % + (sys.hexversion >> 24, (sys.hexversion >> 16) & 0xff)) + return ext.libraries + [pythonlib] + else: + return ext.libraries # class build_ext diff --git a/Lib/distutils/command/install.py b/Lib/distutils/command/install.py index 7723761..453151d 100644 --- a/Lib/distutils/command/install.py +++ b/Lib/distutils/command/install.py @@ -601,6 +601,7 @@ class install (Command): ('install_headers', has_headers), ('install_scripts', has_scripts), ('install_data', has_data), + ('install_egg_info', lambda self:True), ] # class install diff --git a/Lib/distutils/command/install_egg_info.py b/Lib/distutils/command/install_egg_info.py new file mode 100644 index 0000000..c31ac29 --- /dev/null +++ b/Lib/distutils/command/install_egg_info.py @@ -0,0 +1,75 @@ +"""distutils.command.install_egg_info + +Implements the Distutils 'install_egg_info' command, for installing +a package's PKG-INFO metadata.""" + + +from distutils.cmd import Command +from distutils import log, dir_util +import os, sys, re + +class install_egg_info(Command): + """Install an .egg-info file for the package""" + + description = "Install package's PKG-INFO metadata as an .egg-info file" + user_options = [ + ('install-dir=', 'd', "directory to install to"), + ] + + def initialize_options(self): + self.install_dir = None + + def finalize_options(self): + self.set_undefined_options('install_lib',('install_dir','install_dir')) + basename = "%s-%s-py%s.egg-info" % ( + to_filename(safe_name(self.distribution.get_name())), + to_filename(safe_version(self.distribution.get_version())), + sys.version[:3] + ) + self.target = os.path.join(self.install_dir, basename) + self.outputs = [self.target] + + def run(self): + target = self.target + if os.path.isdir(target) and not os.path.islink(target): + dir_util.remove_tree(target, dry_run=self.dry_run) + elif os.path.exists(target): + self.execute(os.unlink,(self.target,),"Removing "+target) + log.info("Writing %s", target) + if not self.dry_run: + f = open(target, 'w') + self.distribution.metadata.write_pkg_file(f) + f.close() + + def get_outputs(self): + return self.outputs + + +# The following routines are taken from setuptools' pkg_resources module and +# can be replaced by importing them from pkg_resources once it is included +# in the stdlib. + +def safe_name(name): + """Convert an arbitrary string to a standard distribution name + + Any runs of non-alphanumeric/. characters are replaced with a single '-'. + """ + return re.sub('[^A-Za-z0-9.]+', '-', name) + + +def safe_version(version): + """Convert an arbitrary string to a standard version string + + Spaces become dots, and all other non-alphanumeric characters become + dashes, with runs of multiple dashes condensed to a single dash. + """ + version = version.replace(' ','.') + return re.sub('[^A-Za-z0-9.]+', '-', version) + + +def to_filename(name): + """Convert a project or version name to its filename-escaped form + + Any '-' characters are currently replaced with '_'. + """ + return name.replace('-','_') diff --git a/Lib/distutils/command/upload.py b/Lib/distutils/command/upload.py index 62767a3..6f4ce81 100644 --- a/Lib/distutils/command/upload.py +++ b/Lib/distutils/command/upload.py @@ -29,6 +29,7 @@ class upload(Command): 'display full response text from server'), ('sign', 's', 'sign files to upload using gpg'), + ('identity=', 'i', 'GPG identity used to sign files'), ] boolean_options = ['show-response', 'sign'] @@ -38,8 +39,13 @@ class upload(Command): self.repository = '' self.show_response = 0 self.sign = False + self.identity = None def finalize_options(self): + if self.identity and not self.sign: + raise DistutilsOptionError( + "Must use --sign for --identity to have meaning" + ) if os.environ.has_key('HOME'): rc = os.path.join(os.environ['HOME'], '.pypirc') if os.path.exists(rc): @@ -67,7 +73,10 @@ class upload(Command): def upload_file(self, command, pyversion, filename): # Sign if requested if self.sign: - spawn(("gpg", "--detach-sign", "-a", filename), + gpg_args = ["gpg", "--detach-sign", "-a", filename] + if self.identity: + gpg_args[2:2] = ["--local-user", self.identity] + spawn(gpg_args, dry_run=self.dry_run) # Fill in the data - send all the meta-data in case we need to diff --git a/Lib/distutils/log.py b/Lib/distutils/log.py index cf3ee13..95d4c1c 100644 --- a/Lib/distutils/log.py +++ b/Lib/distutils/log.py @@ -20,7 +20,12 @@ class Log: def _log(self, level, msg, args): if level >= self.threshold: - print msg % args + if not args: + # msg may contain a '%'. If args is empty, + # don't even try to string-format + print msg + else: + print msg % args sys.stdout.flush() def log(self, level, msg, *args): diff --git a/Lib/distutils/sysconfig.py b/Lib/distutils/sysconfig.py index dc603be..49536f0 100644 --- a/Lib/distutils/sysconfig.py +++ b/Lib/distutils/sysconfig.py @@ -31,7 +31,7 @@ landmark = os.path.join(argv0_path, "Modules", "Setup") python_build = os.path.isfile(landmark) -del argv0_path, landmark +del landmark def get_python_version(): @@ -185,7 +185,7 @@ def customize_compiler(compiler): def get_config_h_filename(): """Return full pathname of installed pyconfig.h file.""" if python_build: - inc_dir = os.curdir + inc_dir = argv0_path else: inc_dir = get_python_inc(plat_specific=1) if get_python_version() < '2.2': @@ -213,8 +213,8 @@ def parse_config_h(fp, g=None): """ if g is None: g = {} - define_rx = re.compile("#define ([A-Z][A-Z0-9_]+) (.*)\n") - undef_rx = re.compile("/[*] #undef ([A-Z][A-Z0-9_]+) [*]/\n") + define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n") + undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n") # while 1: line = fp.readline() @@ -351,6 +351,17 @@ def _init_posix(): raise DistutilsPlatformError(my_msg) + # load the installed pyconfig.h: + try: + filename = get_config_h_filename() + parse_config_h(file(filename), g) + except IOError, msg: + my_msg = "invalid Python installation: unable to open %s" % filename + if hasattr(msg, "strerror"): + my_msg = my_msg + " (%s)" % msg.strerror + + raise DistutilsPlatformError(my_msg) + # On MacOSX we need to check the setting of the environment variable # MACOSX_DEPLOYMENT_TARGET: configure bases some choices on it so # it needs to be compatible. @@ -361,7 +372,7 @@ def _init_posix(): if cur_target == '': cur_target = cfg_target os.putenv('MACOSX_DEPLOYMENT_TARGET', cfg_target) - if cfg_target != cur_target: + elif map(int, cfg_target.split('.')) > map(int, cur_target.split('.')): my_msg = ('$MACOSX_DEPLOYMENT_TARGET mismatch: now "%s" but "%s" during configure' % (cur_target, cfg_target)) raise DistutilsPlatformError(my_msg) diff --git a/Lib/doctest.py b/Lib/doctest.py index 6244fae..70c355a 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -236,6 +236,15 @@ def _normalize_module(module, depth=2): else: raise TypeError("Expected a module, string, or None") +def _load_testfile(filename, package, module_relative): + if module_relative: + package = _normalize_module(package, 3) + filename = _module_relative_path(package, filename) + if hasattr(package, '__loader__'): + if hasattr(package.__loader__, 'get_data'): + return package.__loader__.get_data(filename), filename + return open(filename).read(), filename + def _indent(s, indent=4): """ Add the given number of space characters to the beginning every @@ -1319,13 +1328,13 @@ class DocTestRunner: __LINECACHE_FILENAME_RE = re.compile(r'<doctest ' r'(?P<name>[\w\.]+)' r'\[(?P<examplenum>\d+)\]>$') - def __patched_linecache_getlines(self, filename): + def __patched_linecache_getlines(self, filename, module_globals=None): m = self.__LINECACHE_FILENAME_RE.match(filename) if m and m.group('name') == self.test.name: example = self.test.examples[int(m.group('examplenum'))] return example.source.splitlines(True) else: - return self.save_linecache_getlines(filename) + return self.save_linecache_getlines(filename, module_globals) def run(self, test, compileflags=None, out=None, clear_globs=True): """ @@ -1933,9 +1942,7 @@ def testfile(filename, module_relative=True, name=None, package=None, "relative paths.") # Relativize the path - if module_relative: - package = _normalize_module(package) - filename = _module_relative_path(package, filename) + text, filename = _load_testfile(filename, package, module_relative) # If no name was given, then use the file's name. if name is None: @@ -1955,8 +1962,7 @@ def testfile(filename, module_relative=True, name=None, package=None, runner = DocTestRunner(verbose=verbose, optionflags=optionflags) # Read the file, convert it to a test, and run it. - s = open(filename).read() - test = parser.get_doctest(s, globs, name, filename, 0) + test = parser.get_doctest(text, globs, name, filename, 0) runner.run(test) if report: @@ -2336,15 +2342,13 @@ def DocFileTest(path, module_relative=True, package=None, "relative paths.") # Relativize the path. - if module_relative: - package = _normalize_module(package) - path = _module_relative_path(package, path) + doc, path = _load_testfile(path, package, module_relative) + if "__file__" not in globs: globs["__file__"] = path # Find the file and read it. name = os.path.basename(path) - doc = open(path).read() # Convert it to a test, and wrap it in a DocFileCase. test = parser.get_doctest(doc, globs, name, path, 0) diff --git a/Lib/dummy_thread.py b/Lib/dummy_thread.py index fb3abbf..d69d840 100644 --- a/Lib/dummy_thread.py +++ b/Lib/dummy_thread.py @@ -113,6 +113,14 @@ class LockType(object): self.locked_status = True return True + __enter__ = acquire + + def __exit__(self, typ, val, tb): + self.release() + + def __context__(self): + return self + def release(self): """Release the dummy lock.""" # XXX Perhaps shouldn't actually bother to test? Could lead diff --git a/Lib/easy_install.py b/Lib/easy_install.py new file mode 100644 index 0000000..d87e984 --- /dev/null +++ b/Lib/easy_install.py @@ -0,0 +1,5 @@ +"""Run the EasyInstall command""" + +if __name__ == '__main__': + from setuptools.command.easy_install import main + main() diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py deleted file mode 100644 index fd4043b..0000000 --- a/Lib/email/Charset.py +++ /dev/null @@ -1,370 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Ben Gertzfield, Barry Warsaw -# Contact: email-sig@python.org - -import email.base64MIME -import email.quopriMIME -from email.Encoders import encode_7or8bit - - - -# Flags for types of header encodings -QP = 1 # Quoted-Printable -BASE64 = 2 # Base64 -SHORTEST = 3 # the shorter of QP and base64, but only for headers - -# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 -MISC_LEN = 7 - -DEFAULT_CHARSET = 'us-ascii' - - - -# Defaults -CHARSETS = { - # input header enc body enc output conv - 'iso-8859-1': (QP, QP, None), - 'iso-8859-2': (QP, QP, None), - 'iso-8859-3': (QP, QP, None), - 'iso-8859-4': (QP, QP, None), - # iso-8859-5 is Cyrillic, and not especially used - # iso-8859-6 is Arabic, also not particularly used - # iso-8859-7 is Greek, QP will not make it readable - # iso-8859-8 is Hebrew, QP will not make it readable - 'iso-8859-9': (QP, QP, None), - 'iso-8859-10': (QP, QP, None), - # iso-8859-11 is Thai, QP will not make it readable - 'iso-8859-13': (QP, QP, None), - 'iso-8859-14': (QP, QP, None), - 'iso-8859-15': (QP, QP, None), - 'windows-1252':(QP, QP, None), - 'viscii': (QP, QP, None), - 'us-ascii': (None, None, None), - 'big5': (BASE64, BASE64, None), - 'gb2312': (BASE64, BASE64, None), - 'euc-jp': (BASE64, None, 'iso-2022-jp'), - 'shift_jis': (BASE64, None, 'iso-2022-jp'), - 'iso-2022-jp': (BASE64, None, None), - 'koi8-r': (BASE64, BASE64, None), - 'utf-8': (SHORTEST, BASE64, 'utf-8'), - # We're making this one up to represent raw unencoded 8-bit - '8bit': (None, BASE64, 'utf-8'), - } - -# Aliases for other commonly-used names for character sets. Map -# them to the real ones used in email. -ALIASES = { - 'latin_1': 'iso-8859-1', - 'latin-1': 'iso-8859-1', - 'latin_2': 'iso-8859-2', - 'latin-2': 'iso-8859-2', - 'latin_3': 'iso-8859-3', - 'latin-3': 'iso-8859-3', - 'latin_4': 'iso-8859-4', - 'latin-4': 'iso-8859-4', - 'latin_5': 'iso-8859-9', - 'latin-5': 'iso-8859-9', - 'latin_6': 'iso-8859-10', - 'latin-6': 'iso-8859-10', - 'latin_7': 'iso-8859-13', - 'latin-7': 'iso-8859-13', - 'latin_8': 'iso-8859-14', - 'latin-8': 'iso-8859-14', - 'latin_9': 'iso-8859-15', - 'latin-9': 'iso-8859-15', - 'cp949': 'ks_c_5601-1987', - 'euc_jp': 'euc-jp', - 'euc_kr': 'euc-kr', - 'ascii': 'us-ascii', - } - - -# Map charsets to their Unicode codec strings. -CODEC_MAP = { - 'gb2312': 'eucgb2312_cn', - 'big5': 'big5_tw', - # Hack: We don't want *any* conversion for stuff marked us-ascii, as all - # sorts of garbage might be sent to us in the guise of 7-bit us-ascii. - # Let that stuff pass through without conversion to/from Unicode. - 'us-ascii': None, - } - - - -# Convenience functions for extending the above mappings -def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): - """Add character set properties to the global registry. - - charset is the input character set, and must be the canonical name of a - character set. - - Optional header_enc and body_enc is either Charset.QP for - quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for - the shortest of qp or base64 encoding, or None for no encoding. SHORTEST - is only valid for header_enc. It describes how message headers and - message bodies in the input charset are to be encoded. Default is no - encoding. - - Optional output_charset is the character set that the output should be - in. Conversions will proceed from input charset, to Unicode, to the - output charset when the method Charset.convert() is called. The default - is to output in the same character set as the input. - - Both input_charset and output_charset must have Unicode codec entries in - the module's charset-to-codec mapping; use add_codec(charset, codecname) - to add codecs the module does not know about. See the codecs module's - documentation for more information. - """ - if body_enc == SHORTEST: - raise ValueError('SHORTEST not allowed for body_enc') - CHARSETS[charset] = (header_enc, body_enc, output_charset) - - -def add_alias(alias, canonical): - """Add a character set alias. - - alias is the alias name, e.g. latin-1 - canonical is the character set's canonical name, e.g. iso-8859-1 - """ - ALIASES[alias] = canonical - - -def add_codec(charset, codecname): - """Add a codec that map characters in the given charset to/from Unicode. - - charset is the canonical name of a character set. codecname is the name - of a Python codec, as appropriate for the second argument to the unicode() - built-in, or to the encode() method of a Unicode string. - """ - CODEC_MAP[charset] = codecname - - - -class Charset: - """Map character sets to their email properties. - - This class provides information about the requirements imposed on email - for a specific character set. It also provides convenience routines for - converting between character sets, given the availability of the - applicable codecs. Given a character set, it will do its best to provide - information on how to use that character set in an email in an - RFC-compliant way. - - Certain character sets must be encoded with quoted-printable or base64 - when used in email headers or bodies. Certain character sets must be - converted outright, and are not allowed in email. Instances of this - module expose the following information about a character set: - - input_charset: The initial character set specified. Common aliases - are converted to their `official' email names (e.g. latin_1 - is converted to iso-8859-1). Defaults to 7-bit us-ascii. - - header_encoding: If the character set must be encoded before it can be - used in an email header, this attribute will be set to - Charset.QP (for quoted-printable), Charset.BASE64 (for - base64 encoding), or Charset.SHORTEST for the shortest of - QP or BASE64 encoding. Otherwise, it will be None. - - body_encoding: Same as header_encoding, but describes the encoding for the - mail message's body, which indeed may be different than the - header encoding. Charset.SHORTEST is not allowed for - body_encoding. - - output_charset: Some character sets must be converted before the can be - used in email headers or bodies. If the input_charset is - one of them, this attribute will contain the name of the - charset output will be converted to. Otherwise, it will - be None. - - input_codec: The name of the Python codec used to convert the - input_charset to Unicode. If no conversion codec is - necessary, this attribute will be None. - - output_codec: The name of the Python codec used to convert Unicode - to the output_charset. If no conversion codec is necessary, - this attribute will have the same value as the input_codec. - """ - def __init__(self, input_charset=DEFAULT_CHARSET): - # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to - # unicode because its .lower() is locale insensitive. - input_charset = unicode(input_charset, 'ascii').lower() - # Set the input charset after filtering through the aliases - self.input_charset = ALIASES.get(input_charset, input_charset) - # We can try to guess which encoding and conversion to use by the - # charset_map dictionary. Try that first, but let the user override - # it. - henc, benc, conv = CHARSETS.get(self.input_charset, - (SHORTEST, BASE64, None)) - if not conv: - conv = self.input_charset - # Set the attributes, allowing the arguments to override the default. - self.header_encoding = henc - self.body_encoding = benc - self.output_charset = ALIASES.get(conv, conv) - # Now set the codecs. If one isn't defined for input_charset, - # guess and try a Unicode codec with the same name as input_codec. - self.input_codec = CODEC_MAP.get(self.input_charset, - self.input_charset) - self.output_codec = CODEC_MAP.get(self.output_charset, - self.output_charset) - - def __str__(self): - return self.input_charset.lower() - - __repr__ = __str__ - - def __eq__(self, other): - return str(self) == str(other).lower() - - def __ne__(self, other): - return not self.__eq__(other) - - def get_body_encoding(self): - """Return the content-transfer-encoding used for body encoding. - - This is either the string `quoted-printable' or `base64' depending on - the encoding used, or it is a function in which case you should call - the function with a single argument, the Message object being - encoded. The function should then set the Content-Transfer-Encoding - header itself to whatever is appropriate. - - Returns "quoted-printable" if self.body_encoding is QP. - Returns "base64" if self.body_encoding is BASE64. - Returns "7bit" otherwise. - """ - assert self.body_encoding <> SHORTEST - if self.body_encoding == QP: - return 'quoted-printable' - elif self.body_encoding == BASE64: - return 'base64' - else: - return encode_7or8bit - - def convert(self, s): - """Convert a string from the input_codec to the output_codec.""" - if self.input_codec <> self.output_codec: - return unicode(s, self.input_codec).encode(self.output_codec) - else: - return s - - def to_splittable(self, s): - """Convert a possibly multibyte string to a safely splittable format. - - Uses the input_codec to try and convert the string to Unicode, so it - can be safely split on character boundaries (even for multibyte - characters). - - Returns the string as-is if it isn't known how to convert it to - Unicode with the input_charset. - - Characters that could not be converted to Unicode will be replaced - with the Unicode replacement character U+FFFD. - """ - if isinstance(s, unicode) or self.input_codec is None: - return s - try: - return unicode(s, self.input_codec, 'replace') - except LookupError: - # Input codec not installed on system, so return the original - # string unchanged. - return s - - def from_splittable(self, ustr, to_output=True): - """Convert a splittable string back into an encoded string. - - Uses the proper codec to try and convert the string from Unicode back - into an encoded format. Return the string as-is if it is not Unicode, - or if it could not be converted from Unicode. - - Characters that could not be converted from Unicode will be replaced - with an appropriate character (usually '?'). - - If to_output is True (the default), uses output_codec to convert to an - encoded format. If to_output is False, uses input_codec. - """ - if to_output: - codec = self.output_codec - else: - codec = self.input_codec - if not isinstance(ustr, unicode) or codec is None: - return ustr - try: - return ustr.encode(codec, 'replace') - except LookupError: - # Output codec not installed - return ustr - - def get_output_charset(self): - """Return the output character set. - - This is self.output_charset if that is not None, otherwise it is - self.input_charset. - """ - return self.output_charset or self.input_charset - - def encoded_header_len(self, s): - """Return the length of the encoded header string.""" - cset = self.get_output_charset() - # The len(s) of a 7bit encoding is len(s) - if self.header_encoding == BASE64: - return email.base64MIME.base64_len(s) + len(cset) + MISC_LEN - elif self.header_encoding == QP: - return email.quopriMIME.header_quopri_len(s) + len(cset) + MISC_LEN - elif self.header_encoding == SHORTEST: - lenb64 = email.base64MIME.base64_len(s) - lenqp = email.quopriMIME.header_quopri_len(s) - return min(lenb64, lenqp) + len(cset) + MISC_LEN - else: - return len(s) - - def header_encode(self, s, convert=False): - """Header-encode a string, optionally converting it to output_charset. - - If convert is True, the string will be converted from the input - charset to the output charset automatically. This is not useful for - multibyte character sets, which have line length issues (multibyte - characters must be split on a character, not a byte boundary); use the - high-level Header class to deal with these issues. convert defaults - to False. - - The type of encoding (base64 or quoted-printable) will be based on - self.header_encoding. - """ - cset = self.get_output_charset() - if convert: - s = self.convert(s) - # 7bit/8bit encodings return the string unchanged (modulo conversions) - if self.header_encoding == BASE64: - return email.base64MIME.header_encode(s, cset) - elif self.header_encoding == QP: - return email.quopriMIME.header_encode(s, cset, maxlinelen=None) - elif self.header_encoding == SHORTEST: - lenb64 = email.base64MIME.base64_len(s) - lenqp = email.quopriMIME.header_quopri_len(s) - if lenb64 < lenqp: - return email.base64MIME.header_encode(s, cset) - else: - return email.quopriMIME.header_encode(s, cset, maxlinelen=None) - else: - return s - - def body_encode(self, s, convert=True): - """Body-encode a string and convert it to output_charset. - - If convert is True (the default), the string will be converted from - the input charset to output charset automatically. Unlike - header_encode(), there are no issues with byte boundaries and - multibyte charsets in email bodies, so this is usually pretty safe. - - The type of encoding (base64 or quoted-printable) will be based on - self.body_encoding. - """ - if convert: - s = self.convert(s) - # 7bit/8bit encodings return the string unchanged (module conversions) - if self.body_encoding is BASE64: - return email.base64MIME.body_encode(s) - elif self.body_encoding is QP: - return email.quopriMIME.body_encode(s) - else: - return s diff --git a/Lib/email/Encoders.py b/Lib/email/Encoders.py deleted file mode 100644 index baac2a3..0000000 --- a/Lib/email/Encoders.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (C) 2001-2004 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Encodings and related functions.""" - -import base64 -from quopri import encodestring as _encodestring - -def _qencode(s): - enc = _encodestring(s, quotetabs=True) - # Must encode spaces, which quopri.encodestring() doesn't do - return enc.replace(' ', '=20') - - -def _bencode(s): - # We can't quite use base64.encodestring() since it tacks on a "courtesy - # newline". Blech! - if not s: - return s - hasnewline = (s[-1] == '\n') - value = base64.encodestring(s) - if not hasnewline and value[-1] == '\n': - return value[:-1] - return value - - - -def encode_base64(msg): - """Encode the message's payload in Base64. - - Also, add an appropriate Content-Transfer-Encoding header. - """ - orig = msg.get_payload() - encdata = _bencode(orig) - msg.set_payload(encdata) - msg['Content-Transfer-Encoding'] = 'base64' - - - -def encode_quopri(msg): - """Encode the message's payload in quoted-printable. - - Also, add an appropriate Content-Transfer-Encoding header. - """ - orig = msg.get_payload() - encdata = _qencode(orig) - msg.set_payload(encdata) - msg['Content-Transfer-Encoding'] = 'quoted-printable' - - - -def encode_7or8bit(msg): - """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" - orig = msg.get_payload() - if orig is None: - # There's no payload. For backwards compatibility we use 7bit - msg['Content-Transfer-Encoding'] = '7bit' - return - # We play a trick to make this go fast. If encoding to ASCII succeeds, we - # know the data must be 7bit, otherwise treat it as 8bit. - try: - orig.encode('ascii') - except UnicodeError: - # iso-2022-* is non-ASCII but still 7-bit - charset = msg.get_charset() - output_cset = charset and charset.output_charset - if output_cset and output_cset.lower().startswith('iso-2202-'): - msg['Content-Transfer-Encoding'] = '7bit' - else: - msg['Content-Transfer-Encoding'] = '8bit' - else: - msg['Content-Transfer-Encoding'] = '7bit' - - - -def encode_noop(msg): - """Do nothing.""" diff --git a/Lib/email/Errors.py b/Lib/email/Errors.py deleted file mode 100644 index e13a2c7..0000000 --- a/Lib/email/Errors.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (C) 2001-2004 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""email package exception classes.""" - - - -class MessageError(Exception): - """Base class for errors in the email package.""" - - -class MessageParseError(MessageError): - """Base class for message parsing errors.""" - - -class HeaderParseError(MessageParseError): - """Error while parsing headers.""" - - -class BoundaryError(MessageParseError): - """Couldn't find terminating boundary.""" - - -class MultipartConversionError(MessageError, TypeError): - """Conversion to a multipart is prohibited.""" - - - -# These are parsing defects which the parser was able to work around. -class MessageDefect: - """Base class for a message defect.""" - - def __init__(self, line=None): - self.line = line - -class NoBoundaryInMultipartDefect(MessageDefect): - """A message claimed to be a multipart but had no boundary parameter.""" - -class StartBoundaryNotFoundDefect(MessageDefect): - """The claimed start boundary was never found.""" - -class FirstHeaderLineIsContinuationDefect(MessageDefect): - """A message had a continuation line as its first header line.""" - -class MisplacedEnvelopeHeaderDefect(MessageDefect): - """A 'Unix-from' header was found in the middle of a header block.""" - -class MalformedHeaderDefect(MessageDefect): - """Found a header that was missing a colon, or was otherwise malformed.""" - -class MultipartInvariantViolationDefect(MessageDefect): - """A message claimed to be a multipart but no subparts were found.""" diff --git a/Lib/email/FeedParser.py b/Lib/email/FeedParser.py deleted file mode 100644 index a2130e2..0000000 --- a/Lib/email/FeedParser.py +++ /dev/null @@ -1,477 +0,0 @@ -# Copyright (C) 2004-2006 Python Software Foundation -# Authors: Baxter, Wouters and Warsaw -# Contact: email-sig@python.org - -"""FeedParser - An email feed parser. - -The feed parser implements an interface for incrementally parsing an email -message, line by line. This has advantages for certain applications, such as -those reading email messages off a socket. - -FeedParser.feed() is the primary interface for pushing new data into the -parser. It returns when there's nothing more it can do with the available -data. When you have no more data to push into the parser, call .close(). -This completes the parsing and returns the root message object. - -The other advantage of this parser is that it will never throw a parsing -exception. Instead, when it finds something unexpected, it adds a 'defect' to -the current message. Defects are just instances that live on the message -object's .defects attribute. -""" - -import re -from email import Errors -from email import Message - -NLCRE = re.compile('\r\n|\r|\n') -NLCRE_bol = re.compile('(\r\n|\r|\n)') -NLCRE_eol = re.compile('(\r\n|\r|\n)$') -NLCRE_crack = re.compile('(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character -# except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') -EMPTYSTRING = '' -NL = '\n' - -NeedMoreData = object() - - - -class BufferedSubFile(object): - """A file-ish object that can have new data loaded into it. - - You can also push and pop line-matching predicates onto a stack. When the - current predicate matches the current line, a false EOF response - (i.e. empty string) is returned instead. This lets the parser adhere to a - simple abstraction -- it parses until EOF closes the current message. - """ - def __init__(self): - # The last partial line pushed into this object. - self._partial = '' - # The list of full, pushed lines, in reverse order - self._lines = [] - # The stack of false-EOF checking predicates. - self._eofstack = [] - # A flag indicating whether the file has been closed or not. - self._closed = False - - def push_eof_matcher(self, pred): - self._eofstack.append(pred) - - def pop_eof_matcher(self): - return self._eofstack.pop() - - def close(self): - # Don't forget any trailing partial line. - self._lines.append(self._partial) - self._partial = '' - self._closed = True - - def readline(self): - if not self._lines: - if self._closed: - return '' - return NeedMoreData - # Pop the line off the stack and see if it matches the current - # false-EOF predicate. - line = self._lines.pop() - # RFC 2046, section 5.1.2 requires us to recognize outer level - # boundaries at any level of inner nesting. Do this, but be sure it's - # in the order of most to least nested. - for ateof in self._eofstack[::-1]: - if ateof(line): - # We're at the false EOF. But push the last line back first. - self._lines.append(line) - return '' - return line - - def unreadline(self, line): - # Let the consumer push a line back into the buffer. - assert line is not NeedMoreData - self._lines.append(line) - - def push(self, data): - """Push some new data into this object.""" - # Handle any previous leftovers - data, self._partial = self._partial + data, '' - # Crack into lines, but preserve the newlines on the end of each - parts = NLCRE_crack.split(data) - # The *ahem* interesting behaviour of re.split when supplied grouping - # parentheses is that the last element of the resulting list is the - # data after the final RE. In the case of a NL/CR terminated string, - # this is the empty string. - self._partial = parts.pop() - # parts is a list of strings, alternating between the line contents - # and the eol character(s). Gather up a list of lines after - # re-attaching the newlines. - lines = [] - for i in range(len(parts) // 2): - lines.append(parts[i*2] + parts[i*2+1]) - self.pushlines(lines) - - def pushlines(self, lines): - # Reverse and insert at the front of the lines. - self._lines[:0] = lines[::-1] - - def is_closed(self): - return self._closed - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if line == '': - raise StopIteration - return line - - - -class FeedParser: - """A feed-style parser of email.""" - - def __init__(self, _factory=Message.Message): - """_factory is called with no arguments to create a new message obj""" - self._factory = _factory - self._input = BufferedSubFile() - self._msgstack = [] - self._parse = self._parsegen().next - self._cur = None - self._last = None - self._headersonly = False - - # Non-public interface for supporting Parser's headersonly flag - def _set_headersonly(self): - self._headersonly = True - - def feed(self, data): - """Push more data into the parser.""" - self._input.push(data) - self._call_parse() - - def _call_parse(self): - try: - self._parse() - except StopIteration: - pass - - def close(self): - """Parse all remaining data and return the root message object.""" - self._input.close() - self._call_parse() - root = self._pop_message() - assert not self._msgstack - # Look for final set of defects - if root.get_content_maintype() == 'multipart' \ - and not root.is_multipart(): - root.defects.append(Errors.MultipartInvariantViolationDefect()) - return root - - def _new_message(self): - msg = self._factory() - if self._cur and self._cur.get_content_type() == 'multipart/digest': - msg.set_default_type('message/rfc822') - if self._msgstack: - self._msgstack[-1].attach(msg) - self._msgstack.append(msg) - self._cur = msg - self._last = msg - - def _pop_message(self): - retval = self._msgstack.pop() - if self._msgstack: - self._cur = self._msgstack[-1] - else: - self._cur = None - return retval - - def _parsegen(self): - # Create a new message and start by parsing headers. - self._new_message() - headers = [] - # Collect the headers, searching for a line that doesn't match the RFC - # 2822 header or continuation pattern (including an empty line). - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - if not headerRE.match(line): - # If we saw the RFC defined header/body separator - # (i.e. newline), just throw it away. Otherwise the line is - # part of the body so push it back. - if not NLCRE.match(line): - self._input.unreadline(line) - break - headers.append(line) - # Done with the headers, so parse them and figure out what we're - # supposed to see in the body of the message. - self._parse_headers(headers) - # Headers-only parsing is a backwards compatibility hack, which was - # necessary in the older parser, which could throw errors. All - # remaining lines in the input are thrown into the message body. - if self._headersonly: - lines = [] - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - if line == '': - break - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - return - if self._cur.get_content_type() == 'message/delivery-status': - # message/delivery-status contains blocks of headers separated by - # a blank line. We'll represent each header block as a separate - # nested message object, but the processing is a bit different - # than standard message/* types because there is no body for the - # nested messages. A blank line separates the subparts. - while True: - self._input.push_eof_matcher(NLCRE.match) - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - msg = self._pop_message() - # We need to pop the EOF matcher in order to tell if we're at - # the end of the current file, not the end of the last block - # of message headers. - self._input.pop_eof_matcher() - # The input stream must be sitting at the newline or at the - # EOF. We want to see if we're at the end of this subpart, so - # first consume the blank line, then test the next line to see - # if we're at this subpart's EOF. - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - break - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - break - if line == '': - break - # Not at EOF so this is a line we're going to need. - self._input.unreadline(line) - return - if self._cur.get_content_maintype() == 'message': - # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - self._pop_message() - return - if self._cur.get_content_maintype() == 'multipart': - boundary = self._cur.get_boundary() - if boundary is None: - # The message /claims/ to be a multipart but it has not - # defined a boundary. That's a problem which we'll handle by - # reading everything until the EOF and marking the message as - # defective. - self._cur.defects.append(Errors.NoBoundaryInMultipartDefect()) - lines = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - return - # Create a line match predicate which matches the inter-part - # boundary as well as the end-of-multipart boundary. Don't push - # this onto the input stream until we've scanned past the - # preamble. - separator = '--' + boundary - boundaryre = re.compile( - '(?P<sep>' + re.escape(separator) + - r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$') - capturing_preamble = True - preamble = [] - linesep = False - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - if line == '': - break - mo = boundaryre.match(line) - if mo: - # If we're looking at the end boundary, we're done with - # this multipart. If there was a newline at the end of - # the closing boundary, then we need to initialize the - # epilogue with the empty string (see below). - if mo.group('end'): - linesep = mo.group('linesep') - break - # We saw an inter-part boundary. Were we in the preamble? - if capturing_preamble: - if preamble: - # According to RFC 2046, the last newline belongs - # to the boundary. - lastline = preamble[-1] - eolmo = NLCRE_eol.search(lastline) - if eolmo: - preamble[-1] = lastline[:-len(eolmo.group(0))] - self._cur.preamble = EMPTYSTRING.join(preamble) - capturing_preamble = False - self._input.unreadline(line) - continue - # We saw a boundary separating two parts. Consume any - # multiple boundary lines that may be following. Our - # interpretation of RFC 2046 BNF grammar does not produce - # body parts within such double boundaries. - while True: - line = self._input.readline() - if line is NeedMoreData: - yield NeedMoreData - continue - mo = boundaryre.match(line) - if not mo: - self._input.unreadline(line) - break - # Recurse to parse this subpart; the input stream points - # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) - for retval in self._parsegen(): - if retval is NeedMoreData: - yield NeedMoreData - continue - break - # Because of RFC 2046, the newline preceding the boundary - # separator actually belongs to the boundary, not the - # previous subpart's payload (or epilogue if the previous - # part is a multipart). - if self._last.get_content_maintype() == 'multipart': - epilogue = self._last.epilogue - if epilogue == '': - self._last.epilogue = None - elif epilogue is not None: - mo = NLCRE_eol.search(epilogue) - if mo: - end = len(mo.group(0)) - self._last.epilogue = epilogue[:-end] - else: - payload = self._last.get_payload() - if isinstance(payload, basestring): - mo = NLCRE_eol.search(payload) - if mo: - payload = payload[:-len(mo.group(0))] - self._last.set_payload(payload) - self._input.pop_eof_matcher() - self._pop_message() - # Set the multipart up for newline cleansing, which will - # happen if we're in a nested multipart. - self._last = self._cur - else: - # I think we must be in the preamble - assert capturing_preamble - preamble.append(line) - # We've seen either the EOF or the end boundary. If we're still - # capturing the preamble, we never saw the start boundary. Note - # that as a defect and store the captured text as the payload. - # Everything from here to the EOF is epilogue. - if capturing_preamble: - self._cur.defects.append(Errors.StartBoundaryNotFoundDefect()) - self._cur.set_payload(EMPTYSTRING.join(preamble)) - epilogue = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - self._cur.epilogue = EMPTYSTRING.join(epilogue) - return - # If the end boundary ended in a newline, we'll need to make sure - # the epilogue isn't None - if linesep: - epilogue = [''] - else: - epilogue = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - epilogue.append(line) - # Any CRLF at the front of the epilogue is not technically part of - # the epilogue. Also, watch out for an empty string epilogue, - # which means a single newline. - if epilogue: - firstline = epilogue[0] - bolmo = NLCRE_bol.match(firstline) - if bolmo: - epilogue[0] = firstline[len(bolmo.group(0)):] - self._cur.epilogue = EMPTYSTRING.join(epilogue) - return - # Otherwise, it's some non-multipart type, so the entire rest of the - # file contents becomes the payload. - lines = [] - for line in self._input: - if line is NeedMoreData: - yield NeedMoreData - continue - lines.append(line) - self._cur.set_payload(EMPTYSTRING.join(lines)) - - def _parse_headers(self, lines): - # Passed a list of lines that make up the headers for the current msg - lastheader = '' - lastvalue = [] - for lineno, line in enumerate(lines): - # Check for continuation - if line[0] in ' \t': - if not lastheader: - # The first line of the headers was a continuation. This - # is illegal, so let's note the defect, store the illegal - # line, and ignore it for purposes of headers. - defect = Errors.FirstHeaderLineIsContinuationDefect(line) - self._cur.defects.append(defect) - continue - lastvalue.append(line) - continue - if lastheader: - # XXX reconsider the joining of folded lines - lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n') - self._cur[lastheader] = lhdr - lastheader, lastvalue = '', [] - # Check for envelope header, i.e. unix-from - if line.startswith('From '): - if lineno == 0: - # Strip off the trailing newline - mo = NLCRE_eol.search(line) - if mo: - line = line[:-len(mo.group(0))] - self._cur.set_unixfrom(line) - continue - elif lineno == len(lines) - 1: - # Something looking like a unix-from at the end - it's - # probably the first line of the body, so push back the - # line and stop. - self._input.unreadline(line) - return - else: - # Weirdly placed unix-from line. Note this as a defect - # and ignore it. - defect = Errors.MisplacedEnvelopeHeaderDefect(line) - self._cur.defects.append(defect) - continue - # Split the line on the colon separating field name from value. - i = line.find(':') - if i < 0: - defect = Errors.MalformedHeaderDefect(line) - self._cur.defects.append(defect) - continue - lastheader = line[:i] - lastvalue = [line[i+1:].lstrip()] - # Done with all the lines, so handle the last header. - if lastheader: - # XXX reconsider the joining of folded lines - self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n') diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py deleted file mode 100644 index 7969916..0000000 --- a/Lib/email/Generator.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright (C) 2001-2006 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Classes to generate plain text from a message object tree.""" - -import re -import sys -import time -import random -import warnings -from cStringIO import StringIO - -from email.Header import Header - -UNDERSCORE = '_' -NL = '\n' - -fcre = re.compile(r'^From ', re.MULTILINE) - -def _is8bitstring(s): - if isinstance(s, str): - try: - unicode(s, 'us-ascii') - except UnicodeError: - return True - return False - - - -class Generator: - """Generates output from a Message object tree. - - This basic generator writes the message to the given file object as plain - text. - """ - # - # Public interface - # - - def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): - """Create the generator for message flattening. - - outfp is the output file-like object for writing the message to. It - must have a write() method. - - Optional mangle_from_ is a flag that, when True (the default), escapes - From_ lines in the body of the message by putting a `>' in front of - them. - - Optional maxheaderlen specifies the longest length for a non-continued - header. When a header line is longer (in characters, with tabs - expanded to 8 spaces) than maxheaderlen, the header will split as - defined in the Header class. Set maxheaderlen to zero to disable - header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. - """ - self._fp = outfp - self._mangle_from_ = mangle_from_ - self._maxheaderlen = maxheaderlen - - def write(self, s): - # Just delegate to the file object - self._fp.write(s) - - def flatten(self, msg, unixfrom=False): - """Print the message object tree rooted at msg to the output file - specified when the Generator instance was created. - - unixfrom is a flag that forces the printing of a Unix From_ delimiter - before the first object in the message tree. If the original message - has no From_ delimiter, a `standard' one is crafted. By default, this - is False to inhibit the printing of any From_ delimiter. - - Note that for subobjects, no From_ line is printed. - """ - if unixfrom: - ufrom = msg.get_unixfrom() - if not ufrom: - ufrom = 'From nobody ' + time.ctime(time.time()) - print >> self._fp, ufrom - self._write(msg) - - # For backwards compatibility, but this is slower - def __call__(self, msg, unixfrom=False): - warnings.warn('__call__() deprecated; use flatten()', - DeprecationWarning, 2) - self.flatten(msg, unixfrom) - - def clone(self, fp): - """Clone this generator with the exact same options.""" - return self.__class__(fp, self._mangle_from_, self._maxheaderlen) - - # - # Protected interface - undocumented ;/ - # - - def _write(self, msg): - # We can't write the headers yet because of the following scenario: - # say a multipart message includes the boundary string somewhere in - # its body. We'd have to calculate the new boundary /before/ we write - # the headers so that we can write the correct Content-Type: - # parameter. - # - # The way we do this, so as to make the _handle_*() methods simpler, - # is to cache any subpart writes into a StringIO. The we write the - # headers and the StringIO contents. That way, subpart handlers can - # Do The Right Thing, and can still modify the Content-Type: header if - # necessary. - oldfp = self._fp - try: - self._fp = sfp = StringIO() - self._dispatch(msg) - finally: - self._fp = oldfp - # Write the headers. First we see if the message object wants to - # handle that itself. If not, we'll do it generically. - meth = getattr(msg, '_write_headers', None) - if meth is None: - self._write_headers(msg) - else: - meth(self) - self._fp.write(sfp.getvalue()) - - def _dispatch(self, msg): - # Get the Content-Type: for the message, then try to dispatch to - # self._handle_<maintype>_<subtype>(). If there's no handler for the - # full MIME type, then dispatch to self._handle_<maintype>(). If - # that's missing too, then dispatch to self._writeBody(). - main = msg.get_content_maintype() - sub = msg.get_content_subtype() - specific = UNDERSCORE.join((main, sub)).replace('-', '_') - meth = getattr(self, '_handle_' + specific, None) - if meth is None: - generic = main.replace('-', '_') - meth = getattr(self, '_handle_' + generic, None) - if meth is None: - meth = self._writeBody - meth(msg) - - # - # Default handlers - # - - def _write_headers(self, msg): - for h, v in msg.items(): - print >> self._fp, '%s:' % h, - if self._maxheaderlen == 0: - # Explicit no-wrapping - print >> self._fp, v - elif isinstance(v, Header): - # Header instances know what to do - print >> self._fp, v.encode() - elif _is8bitstring(v): - # If we have raw 8bit data in a byte string, we have no idea - # what the encoding is. There is no safe way to split this - # string. If it's ascii-subset, then we could do a normal - # ascii split, but if it's multibyte then we could break the - # string. There's no way to know so the least harm seems to - # be to not split the string and risk it being too long. - print >> self._fp, v - else: - # Header's got lots of smarts, so use it. - print >> self._fp, Header( - v, maxlinelen=self._maxheaderlen, - header_name=h, continuation_ws='\t').encode() - # A blank line always separates headers from body - print >> self._fp - - # - # Handlers for writing types and subtypes - # - - def _handle_text(self, msg): - payload = msg.get_payload() - if payload is None: - return - if not isinstance(payload, basestring): - raise TypeError('string payload expected: %s' % type(payload)) - if self._mangle_from_: - payload = fcre.sub('>From ', payload) - self._fp.write(payload) - - # Default body handler - _writeBody = _handle_text - - def _handle_multipart(self, msg): - # The trick here is to write out each part separately, merge them all - # together, and then make sure that the boundary we've chosen isn't - # present in the payload. - msgtexts = [] - subparts = msg.get_payload() - if subparts is None: - subparts = [] - elif isinstance(subparts, basestring): - # e.g. a non-strict parse of a message with no starting boundary. - self._fp.write(subparts) - return - elif not isinstance(subparts, list): - # Scalar payload - subparts = [subparts] - for part in subparts: - s = StringIO() - g = self.clone(s) - g.flatten(part, unixfrom=False) - msgtexts.append(s.getvalue()) - # Now make sure the boundary we've selected doesn't appear in any of - # the message texts. - alltext = NL.join(msgtexts) - # BAW: What about boundaries that are wrapped in double-quotes? - boundary = msg.get_boundary(failobj=_make_boundary(alltext)) - # If we had to calculate a new boundary because the body text - # contained that string, set the new boundary. We don't do it - # unconditionally because, while set_boundary() preserves order, it - # doesn't preserve newlines/continuations in headers. This is no big - # deal in practice, but turns out to be inconvenient for the unittest - # suite. - if msg.get_boundary() <> boundary: - msg.set_boundary(boundary) - # If there's a preamble, write it out, with a trailing CRLF - if msg.preamble is not None: - print >> self._fp, msg.preamble - # dash-boundary transport-padding CRLF - print >> self._fp, '--' + boundary - # body-part - if msgtexts: - self._fp.write(msgtexts.pop(0)) - # *encapsulation - # --> delimiter transport-padding - # --> CRLF body-part - for body_part in msgtexts: - # delimiter transport-padding CRLF - print >> self._fp, '\n--' + boundary - # body-part - self._fp.write(body_part) - # close-delimiter transport-padding - self._fp.write('\n--' + boundary + '--') - if msg.epilogue is not None: - print >> self._fp - self._fp.write(msg.epilogue) - - def _handle_message_delivery_status(self, msg): - # We can't just write the headers directly to self's file object - # because this will leave an extra newline between the last header - # block and the boundary. Sigh. - blocks = [] - for part in msg.get_payload(): - s = StringIO() - g = self.clone(s) - g.flatten(part, unixfrom=False) - text = s.getvalue() - lines = text.split('\n') - # Strip off the unnecessary trailing empty line - if lines and lines[-1] == '': - blocks.append(NL.join(lines[:-1])) - else: - blocks.append(text) - # Now join all the blocks with an empty line. This has the lovely - # effect of separating each block with an empty line, but not adding - # an extra one after the last one. - self._fp.write(NL.join(blocks)) - - def _handle_message(self, msg): - s = StringIO() - g = self.clone(s) - # The payload of a message/rfc822 part should be a multipart sequence - # of length 1. The zeroth element of the list should be the Message - # object for the subpart. Extract that object, stringify it, and - # write it out. - g.flatten(msg.get_payload(0), unixfrom=False) - self._fp.write(s.getvalue()) - - - -_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' - -class DecodedGenerator(Generator): - """Generator a text representation of a message. - - Like the Generator base class, except that non-text parts are substituted - with a format string representing the part. - """ - def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): - """Like Generator.__init__() except that an additional optional - argument is allowed. - - Walks through all subparts of a message. If the subpart is of main - type `text', then it prints the decoded payload of the subpart. - - Otherwise, fmt is a format string that is used instead of the message - payload. fmt is expanded with the following keywords (in - %(keyword)s format): - - type : Full MIME type of the non-text part - maintype : Main MIME type of the non-text part - subtype : Sub-MIME type of the non-text part - filename : Filename of the non-text part - description: Description associated with the non-text part - encoding : Content transfer encoding of the non-text part - - The default value for fmt is None, meaning - - [Non-text (%(type)s) part of message omitted, filename %(filename)s] - """ - Generator.__init__(self, outfp, mangle_from_, maxheaderlen) - if fmt is None: - self._fmt = _FMT - else: - self._fmt = fmt - - def _dispatch(self, msg): - for part in msg.walk(): - maintype = part.get_content_maintype() - if maintype == 'text': - print >> self, part.get_payload(decode=True) - elif maintype == 'multipart': - # Just skip this - pass - else: - print >> self, self._fmt % { - 'type' : part.get_content_type(), - 'maintype' : part.get_content_maintype(), - 'subtype' : part.get_content_subtype(), - 'filename' : part.get_filename('[no filename]'), - 'description': part.get('Content-Description', - '[no description]'), - 'encoding' : part.get('Content-Transfer-Encoding', - '[no encoding]'), - } - - - -# Helper -_width = len(repr(sys.maxint-1)) -_fmt = '%%0%dd' % _width - -def _make_boundary(text=None): - # Craft a random boundary. If text is given, ensure that the chosen - # boundary doesn't appear in the text. - token = random.randrange(sys.maxint) - boundary = ('=' * 15) + (_fmt % token) + '==' - if text is None: - return boundary - b = boundary - counter = 0 - while True: - cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) - if not cre.search(text): - break - b = boundary + '.' + str(counter) - counter += 1 - return b diff --git a/Lib/email/Header.py b/Lib/email/Header.py deleted file mode 100644 index 5e24afe..0000000 --- a/Lib/email/Header.py +++ /dev/null @@ -1,495 +0,0 @@ -# Copyright (C) 2002-2004 Python Software Foundation -# Author: Ben Gertzfield, Barry Warsaw -# Contact: email-sig@python.org - -"""Header encoding and decoding functionality.""" - -import re -import binascii - -import email.quopriMIME -import email.base64MIME -from email.Errors import HeaderParseError -from email.Charset import Charset - -NL = '\n' -SPACE = ' ' -USPACE = u' ' -SPACE8 = ' ' * 8 -UEMPTYSTRING = u'' - -MAXLINELEN = 76 - -USASCII = Charset('us-ascii') -UTF8 = Charset('utf-8') - -# Match encoded-word strings in the form =?charset?q?Hello_World?= -ecre = re.compile(r''' - =\? # literal =? - (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P<encoding>[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) - -# Field name regexp, including trailing colon, but not separating whitespace, -# according to RFC 2822. Character range is from tilde to exclamation mark. -# For use with .match() -fcre = re.compile(r'[\041-\176]+:$') - - - -# Helpers -_max_append = email.quopriMIME._max_append - - - -def decode_header(header): - """Decode a message header value without converting charset. - - Returns a list of (decoded_string, charset) pairs containing each of the - decoded parts of the header. Charset is None for non-encoded parts of the - header, otherwise a lower-case string containing the name of the character - set specified in the encoded string. - - An email.Errors.HeaderParseError may be raised when certain decoding error - occurs (e.g. a base64 decoding exception). - """ - # If no encoding, just return the header - header = str(header) - if not ecre.search(header): - return [(header, None)] - decoded = [] - dec = '' - for line in header.splitlines(): - # This line might not have an encoding in it - if not ecre.search(line): - decoded.append((line, None)) - continue - parts = ecre.split(line) - while parts: - unenc = parts.pop(0).strip() - if unenc: - # Should we continue a long line? - if decoded and decoded[-1][1] is None: - decoded[-1] = (decoded[-1][0] + SPACE + unenc, None) - else: - decoded.append((unenc, None)) - if parts: - charset, encoding = [s.lower() for s in parts[0:2]] - encoded = parts[2] - dec = None - if encoding == 'q': - dec = email.quopriMIME.header_decode(encoded) - elif encoding == 'b': - try: - dec = email.base64MIME.decode(encoded) - except binascii.Error: - # Turn this into a higher level exception. BAW: Right - # now we throw the lower level exception away but - # when/if we get exception chaining, we'll preserve it. - raise HeaderParseError - if dec is None: - dec = encoded - - if decoded and decoded[-1][1] == charset: - decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1]) - else: - decoded.append((dec, charset)) - del parts[0:3] - return decoded - - - -def make_header(decoded_seq, maxlinelen=None, header_name=None, - continuation_ws=' '): - """Create a Header from a sequence of pairs as returned by decode_header() - - decode_header() takes a header value string and returns a sequence of - pairs of the format (decoded_string, charset) where charset is the string - name of the character set. - - This function takes one of those sequence of pairs and returns a Header - instance. Optional maxlinelen, header_name, and continuation_ws are as in - the Header constructor. - """ - h = Header(maxlinelen=maxlinelen, header_name=header_name, - continuation_ws=continuation_ws) - for s, charset in decoded_seq: - # None means us-ascii but we can simply pass it on to h.append() - if charset is not None and not isinstance(charset, Charset): - charset = Charset(charset) - h.append(s, charset) - return h - - - -class Header: - def __init__(self, s=None, charset=None, - maxlinelen=None, header_name=None, - continuation_ws=' ', errors='strict'): - """Create a MIME-compliant header that can contain many character sets. - - Optional s is the initial header value. If None, the initial header - value is not set. You can later append to the header with .append() - method calls. s may be a byte string or a Unicode string, but see the - .append() documentation for semantics. - - Optional charset serves two purposes: it has the same meaning as the - charset argument to the .append() method. It also sets the default - character set for all subsequent .append() calls that omit the charset - argument. If charset is not provided in the constructor, the us-ascii - charset is used both as s's initial charset and as the default for - subsequent .append() calls. - - The maximum line length can be specified explicit via maxlinelen. For - splitting the first line to a shorter value (to account for the field - header which isn't included in s, e.g. `Subject') pass in the name of - the field in header_name. The default maxlinelen is 76. - - continuation_ws must be RFC 2822 compliant folding whitespace (usually - either a space or a hard tab) which will be prepended to continuation - lines. - - errors is passed through to the .append() call. - """ - if charset is None: - charset = USASCII - if not isinstance(charset, Charset): - charset = Charset(charset) - self._charset = charset - self._continuation_ws = continuation_ws - cws_expanded_len = len(continuation_ws.replace('\t', SPACE8)) - # BAW: I believe `chunks' and `maxlinelen' should be non-public. - self._chunks = [] - if s is not None: - self.append(s, charset, errors) - if maxlinelen is None: - maxlinelen = MAXLINELEN - if header_name is None: - # We don't know anything about the field header so the first line - # is the same length as subsequent lines. - self._firstlinelen = maxlinelen - else: - # The first line should be shorter to take into account the field - # header. Also subtract off 2 extra for the colon and space. - self._firstlinelen = maxlinelen - len(header_name) - 2 - # Second and subsequent lines should subtract off the length in - # columns of the continuation whitespace prefix. - self._maxlinelen = maxlinelen - cws_expanded_len - - def __str__(self): - """A synonym for self.encode().""" - return self.encode() - - def __unicode__(self): - """Helper for the built-in unicode function.""" - uchunks = [] - lastcs = None - for s, charset in self._chunks: - # We must preserve spaces between encoded and non-encoded word - # boundaries, which means for us we need to add a space when we go - # from a charset to None/us-ascii, or from None/us-ascii to a - # charset. Only do this for the second and subsequent chunks. - nextcs = charset - if uchunks: - if lastcs not in (None, 'us-ascii'): - if nextcs in (None, 'us-ascii'): - uchunks.append(USPACE) - nextcs = None - elif nextcs not in (None, 'us-ascii'): - uchunks.append(USPACE) - lastcs = nextcs - uchunks.append(unicode(s, str(charset))) - return UEMPTYSTRING.join(uchunks) - - # Rich comparison operators for equality only. BAW: does it make sense to - # have or explicitly disable <, <=, >, >= operators? - def __eq__(self, other): - # other may be a Header or a string. Both are fine so coerce - # ourselves to a string, swap the args and do another comparison. - return other == self.encode() - - def __ne__(self, other): - return not self == other - - def append(self, s, charset=None, errors='strict'): - """Append a string to the MIME header. - - Optional charset, if given, should be a Charset instance or the name - of a character set (which will be converted to a Charset instance). A - value of None (the default) means that the charset given in the - constructor is used. - - s may be a byte string or a Unicode string. If it is a byte string - (i.e. isinstance(s, str) is true), then charset is the encoding of - that byte string, and a UnicodeError will be raised if the string - cannot be decoded with that charset. If s is a Unicode string, then - charset is a hint specifying the character set of the characters in - the string. In this case, when producing an RFC 2822 compliant header - using RFC 2047 rules, the Unicode string will be encoded using the - following charsets in order: us-ascii, the charset hint, utf-8. The - first character set not to provoke a UnicodeError is used. - - Optional `errors' is passed as the third argument to any unicode() or - ustr.encode() call. - """ - if charset is None: - charset = self._charset - elif not isinstance(charset, Charset): - charset = Charset(charset) - # If the charset is our faux 8bit charset, leave the string unchanged - if charset <> '8bit': - # We need to test that the string can be converted to unicode and - # back to a byte string, given the input and output codecs of the - # charset. - if isinstance(s, str): - # Possibly raise UnicodeError if the byte string can't be - # converted to a unicode with the input codec of the charset. - incodec = charset.input_codec or 'us-ascii' - ustr = unicode(s, incodec, errors) - # Now make sure that the unicode could be converted back to a - # byte string with the output codec, which may be different - # than the iput coded. Still, use the original byte string. - outcodec = charset.output_codec or 'us-ascii' - ustr.encode(outcodec, errors) - elif isinstance(s, unicode): - # Now we have to be sure the unicode string can be converted - # to a byte string with a reasonable output codec. We want to - # use the byte string in the chunk. - for charset in USASCII, charset, UTF8: - try: - outcodec = charset.output_codec or 'us-ascii' - s = s.encode(outcodec, errors) - break - except UnicodeError: - pass - else: - assert False, 'utf-8 conversion failed' - self._chunks.append((s, charset)) - - def _split(self, s, charset, maxlinelen, splitchars): - # Split up a header safely for use with encode_chunks. - splittable = charset.to_splittable(s) - encoded = charset.from_splittable(splittable, True) - elen = charset.encoded_header_len(encoded) - # If the line's encoded length first, just return it - if elen <= maxlinelen: - return [(encoded, charset)] - # If we have undetermined raw 8bit characters sitting in a byte - # string, we really don't know what the right thing to do is. We - # can't really split it because it might be multibyte data which we - # could break if we split it between pairs. The least harm seems to - # be to not split the header at all, but that means they could go out - # longer than maxlinelen. - if charset == '8bit': - return [(s, charset)] - # BAW: I'm not sure what the right test here is. What we're trying to - # do is be faithful to RFC 2822's recommendation that ($2.2.3): - # - # "Note: Though structured field bodies are defined in such a way that - # folding can take place between many of the lexical tokens (and even - # within some of the lexical tokens), folding SHOULD be limited to - # placing the CRLF at higher-level syntactic breaks." - # - # For now, I can only imagine doing this when the charset is us-ascii, - # although it's possible that other charsets may also benefit from the - # higher-level syntactic breaks. - elif charset == 'us-ascii': - return self._split_ascii(s, charset, maxlinelen, splitchars) - # BAW: should we use encoded? - elif elen == len(s): - # We can split on _maxlinelen boundaries because we know that the - # encoding won't change the size of the string - splitpnt = maxlinelen - first = charset.from_splittable(splittable[:splitpnt], False) - last = charset.from_splittable(splittable[splitpnt:], False) - else: - # Binary search for split point - first, last = _binsplit(splittable, charset, maxlinelen) - # first is of the proper length so just wrap it in the appropriate - # chrome. last must be recursively split. - fsplittable = charset.to_splittable(first) - fencoded = charset.from_splittable(fsplittable, True) - chunk = [(fencoded, charset)] - return chunk + self._split(last, charset, self._maxlinelen, splitchars) - - def _split_ascii(self, s, charset, firstlen, splitchars): - chunks = _split_ascii(s, firstlen, self._maxlinelen, - self._continuation_ws, splitchars) - return zip(chunks, [charset]*len(chunks)) - - def _encode_chunks(self, newchunks, maxlinelen): - # MIME-encode a header with many different charsets and/or encodings. - # - # Given a list of pairs (string, charset), return a MIME-encoded - # string suitable for use in a header field. Each pair may have - # different charsets and/or encodings, and the resulting header will - # accurately reflect each setting. - # - # Each encoding can be email.Utils.QP (quoted-printable, for - # ASCII-like character sets like iso-8859-1), email.Utils.BASE64 - # (Base64, for non-ASCII like character sets like KOI8-R and - # iso-2022-jp), or None (no encoding). - # - # Each pair will be represented on a separate line; the resulting - # string will be in the format: - # - # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n - # =?charset2?b?SvxyZ2VuIEL2aW5n?=" - chunks = [] - for header, charset in newchunks: - if not header: - continue - if charset is None or charset.header_encoding is None: - s = header - else: - s = charset.header_encode(header) - # Don't add more folding whitespace than necessary - if chunks and chunks[-1].endswith(' '): - extra = '' - else: - extra = ' ' - _max_append(chunks, s, maxlinelen, extra) - joiner = NL + self._continuation_ws - return joiner.join(chunks) - - def encode(self, splitchars=';, '): - """Encode a message header into an RFC-compliant format. - - There are many issues involved in converting a given string for use in - an email header. Only certain character sets are readable in most - email clients, and as header strings can only contain a subset of - 7-bit ASCII, care must be taken to properly convert and encode (with - Base64 or quoted-printable) header strings. In addition, there is a - 75-character length limit on any given encoded header field, so - line-wrapping must be performed, even with double-byte character sets. - - This method will do its best to convert the string to the correct - character set used in email, and encode and line wrap it safely with - the appropriate scheme for that character set. - - If the given charset is not known or an error occurs during - conversion, this function will return the header untouched. - - Optional splitchars is a string containing characters to split long - ASCII lines on, in rough support of RFC 2822's `highest level - syntactic breaks'. This doesn't affect RFC 2047 encoded lines. - """ - newchunks = [] - maxlinelen = self._firstlinelen - lastlen = 0 - for s, charset in self._chunks: - # The first bit of the next chunk should be just long enough to - # fill the next line. Don't forget the space separating the - # encoded words. - targetlen = maxlinelen - lastlen - 1 - if targetlen < charset.encoded_header_len(''): - # Stick it on the next line - targetlen = maxlinelen - newchunks += self._split(s, charset, targetlen, splitchars) - lastchunk, lastcharset = newchunks[-1] - lastlen = lastcharset.encoded_header_len(lastchunk) - return self._encode_chunks(newchunks, maxlinelen) - - - -def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars): - lines = [] - maxlen = firstlen - for line in s.splitlines(): - # Ignore any leading whitespace (i.e. continuation whitespace) already - # on the line, since we'll be adding our own. - line = line.lstrip() - if len(line) < maxlen: - lines.append(line) - maxlen = restlen - continue - # Attempt to split the line at the highest-level syntactic break - # possible. Note that we don't have a lot of smarts about field - # syntax; we just try to break on semi-colons, then commas, then - # whitespace. - for ch in splitchars: - if ch in line: - break - else: - # There's nothing useful to split the line on, not even spaces, so - # just append this line unchanged - lines.append(line) - maxlen = restlen - continue - # Now split the line on the character plus trailing whitespace - cre = re.compile(r'%s\s*' % ch) - if ch in ';,': - eol = ch - else: - eol = '' - joiner = eol + ' ' - joinlen = len(joiner) - wslen = len(continuation_ws.replace('\t', SPACE8)) - this = [] - linelen = 0 - for part in cre.split(line): - curlen = linelen + max(0, len(this)-1) * joinlen - partlen = len(part) - onfirstline = not lines - # We don't want to split after the field name, if we're on the - # first line and the field name is present in the header string. - if ch == ' ' and onfirstline and \ - len(this) == 1 and fcre.match(this[0]): - this.append(part) - linelen += partlen - elif curlen + partlen > maxlen: - if this: - lines.append(joiner.join(this) + eol) - # If this part is longer than maxlen and we aren't already - # splitting on whitespace, try to recursively split this line - # on whitespace. - if partlen > maxlen and ch <> ' ': - subl = _split_ascii(part, maxlen, restlen, - continuation_ws, ' ') - lines.extend(subl[:-1]) - this = [subl[-1]] - else: - this = [part] - linelen = wslen + len(this[-1]) - maxlen = restlen - else: - this.append(part) - linelen += partlen - # Put any left over parts on a line by themselves - if this: - lines.append(joiner.join(this)) - return lines - - - -def _binsplit(splittable, charset, maxlinelen): - i = 0 - j = len(splittable) - while i < j: - # Invariants: - # 1. splittable[:k] fits for all k <= i (note that we *assume*, - # at the start, that splittable[:0] fits). - # 2. splittable[:k] does not fit for any k > j (at the start, - # this means we shouldn't look at any k > len(splittable)). - # 3. We don't know about splittable[:k] for k in i+1..j. - # 4. We want to set i to the largest k that fits, with i <= k <= j. - # - m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j - chunk = charset.from_splittable(splittable[:m], True) - chunklen = charset.encoded_header_len(chunk) - if chunklen <= maxlinelen: - # m is acceptable, so is a new lower bound. - i = m - else: - # m is not acceptable, so final i must be < m. - j = m - 1 - # i == j. Invariant #1 implies that splittable[:i] fits, and - # invariant #2 implies that splittable[:i+1] does not fit, so i - # is what we're looking for. - first = charset.from_splittable(splittable[:i], False) - last = charset.from_splittable(splittable[i:], False) - return first, last diff --git a/Lib/email/Iterators.py b/Lib/email/Iterators.py deleted file mode 100644 index 74a93c7..0000000 --- a/Lib/email/Iterators.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (C) 2001-2004 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Various types of useful iterators and generators.""" - -import sys -from cStringIO import StringIO - - - -# This function will become a method of the Message class -def walk(self): - """Walk over the message tree, yielding each subpart. - - The walk is performed in depth-first order. This method is a - generator. - """ - yield self - if self.is_multipart(): - for subpart in self.get_payload(): - for subsubpart in subpart.walk(): - yield subsubpart - - - -# These two functions are imported into the Iterators.py interface module. -# The Python 2.2 version uses generators for efficiency. -def body_line_iterator(msg, decode=False): - """Iterate over the parts, returning string payloads line-by-line. - - Optional decode (default False) is passed through to .get_payload(). - """ - for subpart in msg.walk(): - payload = subpart.get_payload(decode=decode) - if isinstance(payload, basestring): - for line in StringIO(payload): - yield line - - -def typed_subpart_iterator(msg, maintype='text', subtype=None): - """Iterate over the subparts with a given MIME type. - - Use `maintype' as the main MIME type to match against; this defaults to - "text". Optional `subtype' is the MIME subtype to match against; if - omitted, only the main type is matched. - """ - for subpart in msg.walk(): - if subpart.get_content_maintype() == maintype: - if subtype is None or subpart.get_content_subtype() == subtype: - yield subpart - - - -def _structure(msg, fp=None, level=0, include_default=False): - """A handy debugging aid""" - if fp is None: - fp = sys.stdout - tab = ' ' * (level * 4) - print >> fp, tab + msg.get_content_type(), - if include_default: - print >> fp, '[%s]' % msg.get_default_type() - else: - print >> fp - if msg.is_multipart(): - for subpart in msg.get_payload(): - _structure(subpart, fp, level+1, include_default) diff --git a/Lib/email/MIMEAudio.py b/Lib/email/MIMEAudio.py deleted file mode 100644 index 266ec4c..0000000 --- a/Lib/email/MIMEAudio.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2001-2004 Python Software Foundation -# Author: Anthony Baxter -# Contact: email-sig@python.org - -"""Class representing audio/* type MIME documents.""" - -import sndhdr -from cStringIO import StringIO - -from email import Errors -from email import Encoders -from email.MIMENonMultipart import MIMENonMultipart - - - -_sndhdr_MIMEmap = {'au' : 'basic', - 'wav' :'x-wav', - 'aiff':'x-aiff', - 'aifc':'x-aiff', - } - -# There are others in sndhdr that don't have MIME types. :( -# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? -def _whatsnd(data): - """Try to identify a sound file type. - - sndhdr.what() has a pretty cruddy interface, unfortunately. This is why - we re-do it here. It would be easier to reverse engineer the Unix 'file' - command and use the standard 'magic' file, as shipped with a modern Unix. - """ - hdr = data[:512] - fakefile = StringIO(hdr) - for testfn in sndhdr.tests: - res = testfn(hdr, fakefile) - if res is not None: - return _sndhdr_MIMEmap.get(res[0]) - return None - - - -class MIMEAudio(MIMENonMultipart): - """Class for generating audio/* MIME documents.""" - - def __init__(self, _audiodata, _subtype=None, - _encoder=Encoders.encode_base64, **_params): - """Create an audio/* type MIME document. - - _audiodata is a string containing the raw audio data. If this data - can be decoded by the standard Python `sndhdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific audio subtype via the - _subtype parameter. If _subtype is not given, and no subtype can be - guessed, a TypeError is raised. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = _whatsnd(_audiodata) - if _subtype is None: - raise TypeError('Could not find audio MIME subtype') - MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) - self.set_payload(_audiodata) - _encoder(self) diff --git a/Lib/email/MIMEBase.py b/Lib/email/MIMEBase.py deleted file mode 100644 index 88691f8..0000000 --- a/Lib/email/MIMEBase.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (C) 2001-2004 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Base class for MIME specializations.""" - -from email import Message - - - -class MIMEBase(Message.Message): - """Base class for MIME specializations.""" - - def __init__(self, _maintype, _subtype, **_params): - """This constructor adds a Content-Type: and a MIME-Version: header. - - The Content-Type: header is taken from the _maintype and _subtype - arguments. Additional parameters for this header are taken from the - keyword arguments. - """ - Message.Message.__init__(self) - ctype = '%s/%s' % (_maintype, _subtype) - self.add_header('Content-Type', ctype, **_params) - self['MIME-Version'] = '1.0' diff --git a/Lib/email/MIMEImage.py b/Lib/email/MIMEImage.py deleted file mode 100644 index a658067..0000000 --- a/Lib/email/MIMEImage.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2001-2004 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Class representing image/* type MIME documents.""" - -import imghdr - -from email import Errors -from email import Encoders -from email.MIMENonMultipart import MIMENonMultipart - - - -class MIMEImage(MIMENonMultipart): - """Class for generating image/* type MIME documents.""" - - def __init__(self, _imagedata, _subtype=None, - _encoder=Encoders.encode_base64, **_params): - """Create an image/* type MIME document. - - _imagedata is a string containing the raw image data. If this data - can be decoded by the standard Python `imghdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific image subtype via the _subtype - parameter. - - _encoder is a function which will perform the actual encoding for - transport of the image data. It takes one argument, which is this - Image instance. It should use get_payload() and set_payload() to - change the payload to the encoded form. It should also add any - Content-Transfer-Encoding or other headers to the message as - necessary. The default encoding is Base64. - - Any additional keyword arguments are passed to the base class - constructor, which turns them into parameters on the Content-Type - header. - """ - if _subtype is None: - _subtype = imghdr.what(None, _imagedata) - if _subtype is None: - raise TypeError('Could not guess image MIME subtype') - MIMENonMultipart.__init__(self, 'image', _subtype, **_params) - self.set_payload(_imagedata) - _encoder(self) diff --git a/Lib/email/MIMEMessage.py b/Lib/email/MIMEMessage.py deleted file mode 100644 index 3021934..0000000 --- a/Lib/email/MIMEMessage.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (C) 2001-2004 Python Software Foundation -# Author: Barry Warsaw -# Contact: email-sig@python.org - -"""Class representing message/* MIME documents.""" - -from email import Message -from email.MIMENonMultipart import MIMENonMultipart - - - -class MIMEMessage(MIMENonMultipart): - """Class representing message/* MIME documents.""" - - def __init__(self, _msg, _subtype='rfc822'): - """Create a message/* type MIME document. - - _msg is a message object and must be an instance of Message, or a - derived class of Message, otherwise a TypeError is raised. - - Optional _subtype defines the subtype of the contained message.